No OneTemporary
Actions

Size

5 MB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	Index: head/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S
	===================================================================
	--- head/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S (revision 322854)
	+++ head/contrib/compiler-rt/lib/builtins/arm/aeabi_dcmp.S (revision 322855)
	@@ -1,43 +1,52 @@
	//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is dual licensed under the MIT and the University of Illinois Open
	// Source Licenses. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	#include "../assembly.h"

	// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) {
	// int result = __{eq,lt,le,ge,gt}df2(a, b);
	// if (result {==,<,<=,>=,>} 0) {
	// return 1;
	// } else {
	// return 0;
	// }
	// }

	+#if defined(COMPILER_RT_ARMHF_TARGET)
	+# define CONVERT_DCMP_ARGS_TO_DF2_ARGS \
	+ vmov d0, r0, r1 SEPARATOR \
	+ vmov d1, r2, r3
	+#else
	+# define CONVERT_DCMP_ARGS_TO_DF2_ARGS
	+#endif
	+
	#define DEFINE_AEABI_DCMP(cond) \
	.syntax unified SEPARATOR \
	.p2align 2 SEPARATOR \
	DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \
	push { r4, lr } SEPARATOR \
	+ CONVERT_DCMP_ARGS_TO_DF2_ARGS SEPARATOR \
	bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
	cmp r0, #0 SEPARATOR \
	b ## cond 1f SEPARATOR \
	movs r0, #0 SEPARATOR \
	pop { r4, pc } SEPARATOR \
	1: SEPARATOR \
	movs r0, #1 SEPARATOR \
	pop { r4, pc } SEPARATOR \
	END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)

	DEFINE_AEABI_DCMP(eq)
	DEFINE_AEABI_DCMP(lt)
	DEFINE_AEABI_DCMP(le)
	DEFINE_AEABI_DCMP(ge)
	DEFINE_AEABI_DCMP(gt)

	NO_EXEC_STACK_DIRECTIVE

	Index: head/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S
	===================================================================
	--- head/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S (revision 322854)
	+++ head/contrib/compiler-rt/lib/builtins/arm/aeabi_fcmp.S (revision 322855)
	@@ -1,43 +1,52 @@
	//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is dual licensed under the MIT and the University of Illinois Open
	// Source Licenses. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	#include "../assembly.h"

	// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) {
	// int result = __{eq,lt,le,ge,gt}sf2(a, b);
	// if (result {==,<,<=,>=,>} 0) {
	// return 1;
	// } else {
	// return 0;
	// }
	// }

	+#if defined(COMPILER_RT_ARMHF_TARGET)
	+# define CONVERT_FCMP_ARGS_TO_SF2_ARGS \
	+ vmov s0, r0 SEPARATOR \
	+ vmov s1, r1
	+#else
	+# define CONVERT_FCMP_ARGS_TO_SF2_ARGS
	+#endif
	+
	#define DEFINE_AEABI_FCMP(cond) \
	.syntax unified SEPARATOR \
	.p2align 2 SEPARATOR \
	DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \
	push { r4, lr } SEPARATOR \
	+ CONVERT_FCMP_ARGS_TO_SF2_ARGS SEPARATOR \
	bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
	cmp r0, #0 SEPARATOR \
	b ## cond 1f SEPARATOR \
	movs r0, #0 SEPARATOR \
	pop { r4, pc } SEPARATOR \
	1: SEPARATOR \
	movs r0, #1 SEPARATOR \
	pop { r4, pc } SEPARATOR \
	END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)

	DEFINE_AEABI_FCMP(eq)
	DEFINE_AEABI_FCMP(lt)
	DEFINE_AEABI_FCMP(le)
	DEFINE_AEABI_FCMP(ge)
	DEFINE_AEABI_FCMP(gt)

	NO_EXEC_STACK_DIRECTIVE

	Index: head/contrib/compiler-rt/lib/esan/esan_sideline_linux.cpp
	===================================================================
	--- head/contrib/compiler-rt/lib/esan/esan_sideline_linux.cpp (revision 322854)
	+++ head/contrib/compiler-rt/lib/esan/esan_sideline_linux.cpp (revision 322855)
	@@ -1,177 +1,177 @@
	//===-- esan_sideline_linux.cpp ---------------------------------- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file is a part of EfficiencySanitizer, a family of performance tuners.
	//
	// Support for a separate or "sideline" tool thread on Linux.
	//===----------------------------------------------------------------------===//

	#include "sanitizer_common/sanitizer_platform.h"
	#if SANITIZER_LINUX

	#include "esan_sideline.h"
	#include "sanitizer_common/sanitizer_atomic.h"
	#include "sanitizer_common/sanitizer_common.h"
	#include "sanitizer_common/sanitizer_linux.h"
	#include <errno.h>
	#include <sched.h>
	#include <sys/prctl.h>
	#include <sys/signal.h>
	#include <sys/time.h>
	#include <sys/types.h>
	#include <sys/wait.h>

	namespace __esan {

	static const int SigAltStackSize = 4*1024;
	static const int SidelineStackSize = 4*1024;
	static const uptr SidelineIdUninitialized = 1;

	// FIXME: we'll need some kind of TLS (can we trust that a pthread key will
	// work in our non-POSIX thread?) to access our data in our signal handler
	// with multiple sideline threads. For now we assume there is only one
	// sideline thread and we use a dirty solution of a global var.
	static SidelineThread *TheThread;

	// We aren't passing SA_NODEFER so the same signal is blocked while here.
	void SidelineThread::handleSidelineSignal(int SigNum, void *SigInfo,
	void *Ctx) {
	VPrintf(3, "Sideline signal %d\n", SigNum);
	CHECK_EQ(SigNum, SIGALRM);
	// See above about needing TLS to avoid this global var.
	SidelineThread *Thread = TheThread;
	if (atomic_load(&Thread->SidelineExit, memory_order_relaxed) != 0)
	return;
	Thread->sampleFunc(Thread->FuncArg);
	}

	void SidelineThread::registerSignal(int SigNum) {
	__sanitizer_sigaction SigAct;
	internal_memset(&SigAct, 0, sizeof(SigAct));
	SigAct.sigaction = handleSidelineSignal;
	// We do not pass SA_NODEFER as we want to block the same signal.
	SigAct.sa_flags = SA_ONSTACK \| SA_SIGINFO;
	int Res = internal_sigaction(SigNum, &SigAct, nullptr);
	CHECK_EQ(Res, 0);
	}

	int SidelineThread::runSideline(void *Arg) {
	VPrintf(1, "Sideline thread starting\n");
	SidelineThread Thread = static_cast<SidelineThread>(Arg);

	// If the parent dies, we want to exit also.
	internal_prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);

	// Set up a signal handler on an alternate stack for safety.
	InternalScopedBuffer<char> StackMap(SigAltStackSize);
	- struct sigaltstack SigAltStack;
	+ stack_t SigAltStack;
	SigAltStack.ss_sp = StackMap.data();
	SigAltStack.ss_size = SigAltStackSize;
	SigAltStack.ss_flags = 0;
	internal_sigaltstack(&SigAltStack, nullptr);

	// We inherit the signal mask from the app thread. In case
	// we weren't created at init time, we ensure the mask is empty.
	__sanitizer_sigset_t SigSet;
	internal_sigfillset(&SigSet);
	int Res = internal_sigprocmask(SIG_UNBLOCK, &SigSet, nullptr);
	CHECK_EQ(Res, 0);

	registerSignal(SIGALRM);

	bool TimerSuccess = Thread->adjustTimer(Thread->Freq);
	CHECK(TimerSuccess);

	// We loop, doing nothing but handling itimer signals.
	while (atomic_load(&TheThread->SidelineExit, memory_order_relaxed) == 0)
	sched_yield();

	if (!Thread->adjustTimer(0))
	VPrintf(1, "Failed to disable timer\n");

	VPrintf(1, "Sideline thread exiting\n");
	return 0;
	}

	bool SidelineThread::launchThread(SidelineFunc takeSample, void *Arg,
	u32 FreqMilliSec) {
	// This can only be called once. However, we can't clear a field in
	// the constructor and check for that here as the constructor for
	// a static instance is called after our module_ctor and thus after
	// this routine! Thus we rely on the TheThread check below.
	CHECK(TheThread == nullptr); // Only one sideline thread is supported.
	TheThread = this;
	sampleFunc = takeSample;
	FuncArg = Arg;
	Freq = FreqMilliSec;
	atomic_store(&SidelineExit, 0, memory_order_relaxed);

	// We do without a guard page.
	Stack = static_cast<char*>(MmapOrDie(SidelineStackSize, "SidelineStack"));
	// We need to handle the return value from internal_clone() not having been
	// assigned yet (for our CHECK in adjustTimer()) so we ensure this has a
	// sentinel value.
	SidelineId = SidelineIdUninitialized;
	// By omitting CLONE_THREAD, the child is in its own thread group and will not
	// receive any of the application's signals.
	SidelineId = internal_clone(
	runSideline, Stack + SidelineStackSize,
	CLONE_VM \| CLONE_FS \| CLONE_FILES \| CLONE_UNTRACED,
	this, nullptr /* parent_tidptr */,
	nullptr /* newtls /, nullptr / child_tidptr */);
	int ErrCode;
	if (internal_iserror(SidelineId, &ErrCode)) {
	Printf("FATAL: EfficiencySanitizer failed to spawn a thread (code %d).\n",
	ErrCode);
	Die();
	return false; // Not reached.
	}
	return true;
	}

	bool SidelineThread::joinThread() {
	VPrintf(1, "Joining sideline thread\n");
	bool Res = true;
	atomic_store(&SidelineExit, 1, memory_order_relaxed);
	while (true) {
	uptr Status = internal_waitpid(SidelineId, nullptr, __WALL);
	int ErrCode;
	if (!internal_iserror(Status, &ErrCode))
	break;
	if (ErrCode == EINTR)
	continue;
	VPrintf(1, "Failed to join sideline thread (errno %d)\n", ErrCode);
	Res = false;
	break;
	}
	UnmapOrDie(Stack, SidelineStackSize);
	return Res;
	}

	// Must be called from the sideline thread itself.
	bool SidelineThread::adjustTimer(u32 FreqMilliSec) {
	// The return value of internal_clone() may not have been assigned yet:
	CHECK(internal_getpid() == SidelineId \|\|
	SidelineId == SidelineIdUninitialized);
	Freq = FreqMilliSec;
	struct itimerval TimerVal;
	TimerVal.it_interval.tv_sec = (time_t) Freq / 1000;
	TimerVal.it_interval.tv_usec = (time_t) (Freq % 1000) * 1000;
	TimerVal.it_value.tv_sec = (time_t) Freq / 1000;
	TimerVal.it_value.tv_usec = (time_t) (Freq % 1000) * 1000;
	// As we're in a different thread group, we cannot use either
	// ITIMER_PROF or ITIMER_VIRTUAL without taking up scheduled
	// time ourselves: thus we must use real time.
	int Res = setitimer(ITIMER_REAL, &TimerVal, nullptr);
	return (Res == 0);
	}

	} // namespace __esan

	#endif // SANITIZER_LINUX
	Index: head/contrib/compiler-rt/lib/profile/InstrProfilingNameVar.c
	===================================================================
	--- head/contrib/compiler-rt/lib/profile/InstrProfilingNameVar.c (revision 322854)
	+++ head/contrib/compiler-rt/lib/profile/InstrProfilingNameVar.c (revision 322855)
	@@ -1,18 +1,18 @@
	-//===- InstrProfilingNameVar.c - profile name variable setup --------------===//
	-//
	-// The LLVM Compiler Infrastructure
	-//
	-// This file is distributed under the University of Illinois Open Source
	-// License. See LICENSE.TXT for details.
	-//
	-//===----------------------------------------------------------------------===//
	+/===- InstrProfilingNameVar.c - profile name variable setup -------------===\
	+\|*
	+\|* The LLVM Compiler Infrastructure
	+\|*
	+\|* This file is distributed under the University of Illinois Open Source
	+\|* License. See LICENSE.TXT for details.
	+\|*
	+\===----------------------------------------------------------------------===/

	#include "InstrProfiling.h"

	/* char __llvm_profile_filename[1]
	*
	* The runtime should only provide its own definition of this symbol when the
	* user has not specified one. Set this up by moving the runtime's copy of this
	* symbol to an object file within the archive.
	*/
	COMPILER_RT_WEAK char INSTR_PROF_PROFILE_NAME_VAR[1] = {0};
	Index: head/contrib/compiler-rt
	===================================================================
	--- head/contrib/compiler-rt (revision 322854)
	+++ head/contrib/compiler-rt (revision 322855)

	Property changes on: head/contrib/compiler-rt
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/compiler-rt/dist:r322737-322850
	Index: head/contrib/libc++
	===================================================================
	--- head/contrib/libc++ (revision 322854)
	+++ head/contrib/libc++ (revision 322855)

	Property changes on: head/contrib/libc++
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/libc++/dist:r322737-322850
	Index: head/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
	===================================================================
	--- head/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h (revision 322854)
	+++ head/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h (revision 322855)
	@@ -1,2329 +1,2332 @@
	//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file declares the SDNode class and derived classes, which are used to
	// represent the nodes and operations present in a SelectionDAG. These nodes
	// and operations are machine code level operations, with some similarities to
	// the GCC RTL representation.
	//
	// Clients should include the SelectionDAG.h file instead of this file directly.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
	#define LLVM_CODEGEN_SELECTIONDAGNODES_H

	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/BitVector.h"
	#include "llvm/ADT/FoldingSet.h"
	#include "llvm/ADT/GraphTraits.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/ilist_node.h"
	#include "llvm/ADT/iterator.h"
	#include "llvm/ADT/iterator_range.h"
	#include "llvm/CodeGen/ISDOpcodes.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/MachineValueType.h"
	#include "llvm/CodeGen/ValueTypes.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DebugLoc.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/Support/AlignOf.h"
	#include "llvm/Support/AtomicOrdering.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/ErrorHandling.h"
	#include <algorithm>
	#include <cassert>
	#include <climits>
	#include <cstddef>
	#include <cstdint>
	#include <cstring>
	#include <iterator>
	#include <string>
	#include <tuple>

	namespace llvm {

	class APInt;
	class Constant;
	template <typename T> struct DenseMapInfo;
	class GlobalValue;
	class MachineBasicBlock;
	class MachineConstantPoolValue;
	class MCSymbol;
	class raw_ostream;
	class SDNode;
	class SelectionDAG;
	class Type;
	class Value;

	void checkForCycles(const SDNode N, const SelectionDAG DAG = nullptr,
	bool force = false);

	/// This represents a list of ValueType's that has been intern'd by
	/// a SelectionDAG. Instances of this simple value class are returned by
	/// SelectionDAG::getVTList(...).
	///
	struct SDVTList {
	const EVT *VTs;
	unsigned int NumVTs;
	};

	namespace ISD {

	/// Node predicates

	/// If N is a BUILD_VECTOR node whose elements are all the same constant or
	/// undefined, return true and return the constant value in \p SplatValue.
	- bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
	+ /// This sets \p SplatValue to the smallest possible splat unless AllowShrink
	+ /// is set to false.
	+ bool isConstantSplatVector(const SDNode *N, APInt &SplatValue,
	+ bool AllowShrink = true);

	/// Return true if the specified node is a BUILD_VECTOR where all of the
	/// elements are ~0 or undef.
	bool isBuildVectorAllOnes(const SDNode *N);

	/// Return true if the specified node is a BUILD_VECTOR where all of the
	/// elements are 0 or undef.
	bool isBuildVectorAllZeros(const SDNode *N);

	/// Return true if the specified node is a BUILD_VECTOR node of all
	/// ConstantSDNode or undef.
	bool isBuildVectorOfConstantSDNodes(const SDNode *N);

	/// Return true if the specified node is a BUILD_VECTOR node of all
	/// ConstantFPSDNode or undef.
	bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);

	/// Return true if the node has at least one operand and all operands of the
	/// specified node are ISD::UNDEF.
	bool allOperandsUndef(const SDNode *N);

	} // end namespace ISD

	//===----------------------------------------------------------------------===//
	/// Unlike LLVM values, Selection DAG nodes may return multiple
	/// values as the result of a computation. Many nodes return multiple values,
	/// from loads (which define a token and a return value) to ADDC (which returns
	/// a result and a carry value), to calls (which may return an arbitrary number
	/// of values).
	///
	/// As such, each use of a SelectionDAG computation must indicate the node that
	/// computes it as well as which return value to use from that node. This pair
	/// of information is represented with the SDValue value type.
	///
	class SDValue {
	friend struct DenseMapInfo<SDValue>;

	SDNode *Node = nullptr; // The node defining the value we are using.
	unsigned ResNo = 0; // Which return value of the node we are using.

	public:
	SDValue() = default;
	SDValue(SDNode *node, unsigned resno);

	/// get the index which selects a specific result in the SDNode
	unsigned getResNo() const { return ResNo; }

	/// get the SDNode which holds the desired result
	SDNode *getNode() const { return Node; }

	/// set the SDNode
	void setNode(SDNode *N) { Node = N; }

	inline SDNode *operator->() const { return Node; }

	bool operator==(const SDValue &O) const {
	return Node == O.Node && ResNo == O.ResNo;
	}
	bool operator!=(const SDValue &O) const {
	return !operator==(O);
	}
	bool operator<(const SDValue &O) const {
	return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
	}
	explicit operator bool() const {
	return Node != nullptr;
	}

	SDValue getValue(unsigned R) const {
	return SDValue(Node, R);
	}

	/// Return true if this node is an operand of N.
	bool isOperandOf(const SDNode *N) const;

	/// Return the ValueType of the referenced return value.
	inline EVT getValueType() const;

	/// Return the simple ValueType of the referenced return value.
	MVT getSimpleValueType() const {
	return getValueType().getSimpleVT();
	}

	/// Returns the size of the value in bits.
	unsigned getValueSizeInBits() const {
	return getValueType().getSizeInBits();
	}

	unsigned getScalarValueSizeInBits() const {
	return getValueType().getScalarType().getSizeInBits();
	}

	// Forwarding methods - These forward to the corresponding methods in SDNode.
	inline unsigned getOpcode() const;
	inline unsigned getNumOperands() const;
	inline const SDValue &getOperand(unsigned i) const;
	inline uint64_t getConstantOperandVal(unsigned i) const;
	inline bool isTargetMemoryOpcode() const;
	inline bool isTargetOpcode() const;
	inline bool isMachineOpcode() const;
	inline bool isUndef() const;
	inline unsigned getMachineOpcode() const;
	inline const DebugLoc &getDebugLoc() const;
	inline void dump() const;
	inline void dumpr() const;

	/// Return true if this operand (which must be a chain) reaches the
	/// specified operand without crossing any side-effecting instructions.
	/// In practice, this looks through token factors and non-volatile loads.
	/// In order to remain efficient, this only
	/// looks a couple of nodes in, it does not do an exhaustive search.
	bool reachesChainWithoutSideEffects(SDValue Dest,
	unsigned Depth = 2) const;

	/// Return true if there are no nodes using value ResNo of Node.
	inline bool use_empty() const;

	/// Return true if there is exactly one node using value ResNo of Node.
	inline bool hasOneUse() const;
	};

	template<> struct DenseMapInfo<SDValue> {
	static inline SDValue getEmptyKey() {
	SDValue V;
	V.ResNo = -1U;
	return V;
	}

	static inline SDValue getTombstoneKey() {
	SDValue V;
	V.ResNo = -2U;
	return V;
	}

	static unsigned getHashValue(const SDValue &Val) {
	return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
	(unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
	}

	static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
	return LHS == RHS;
	}
	};
	template <> struct isPodLike<SDValue> { static const bool value = true; };

	/// Allow casting operators to work directly on
	/// SDValues as if they were SDNode*'s.
	template<> struct simplify_type<SDValue> {
	using SimpleType = SDNode *;

	static SimpleType getSimplifiedValue(SDValue &Val) {
	return Val.getNode();
	}
	};
	template<> struct simplify_type<const SDValue> {
	using SimpleType = /const/ SDNode *;

	static SimpleType getSimplifiedValue(const SDValue &Val) {
	return Val.getNode();
	}
	};

	/// Represents a use of a SDNode. This class holds an SDValue,
	/// which records the SDNode being used and the result number, a
	/// pointer to the SDNode using the value, and Next and Prev pointers,
	/// which link together all the uses of an SDNode.
	///
	class SDUse {
	/// Val - The value being used.
	SDValue Val;
	/// User - The user of this value.
	SDNode *User = nullptr;
	/// Prev, Next - Pointers to the uses list of the SDNode referred by
	/// this operand.
	SDUse **Prev = nullptr;
	SDUse *Next = nullptr;

	public:
	SDUse() = default;
	SDUse(const SDUse &U) = delete;
	SDUse &operator=(const SDUse &) = delete;

	/// Normally SDUse will just implicitly convert to an SDValue that it holds.
	operator const SDValue&() const { return Val; }

	/// If implicit conversion to SDValue doesn't work, the get() method returns
	/// the SDValue.
	const SDValue &get() const { return Val; }

	/// This returns the SDNode that contains this Use.
	SDNode *getUser() { return User; }

	/// Get the next SDUse in the use list.
	SDUse *getNext() const { return Next; }

	/// Convenience function for get().getNode().
	SDNode *getNode() const { return Val.getNode(); }
	/// Convenience function for get().getResNo().
	unsigned getResNo() const { return Val.getResNo(); }
	/// Convenience function for get().getValueType().
	EVT getValueType() const { return Val.getValueType(); }

	/// Convenience function for get().operator==
	bool operator==(const SDValue &V) const {
	return Val == V;
	}

	/// Convenience function for get().operator!=
	bool operator!=(const SDValue &V) const {
	return Val != V;
	}

	/// Convenience function for get().operator<
	bool operator<(const SDValue &V) const {
	return Val < V;
	}

	private:
	friend class SelectionDAG;
	friend class SDNode;
	// TODO: unfriend HandleSDNode once we fix its operand handling.
	friend class HandleSDNode;

	void setUser(SDNode *p) { User = p; }

	/// Remove this use from its existing use list, assign it the
	/// given value, and add it to the new value's node's use list.
	inline void set(const SDValue &V);
	/// Like set, but only supports initializing a newly-allocated
	/// SDUse with a non-null value.
	inline void setInitial(const SDValue &V);
	/// Like set, but only sets the Node portion of the value,
	/// leaving the ResNo portion unmodified.
	inline void setNode(SDNode *N);

	void addToList(SDUse **List) {
	Next = *List;
	if (Next) Next->Prev = &Next;
	Prev = List;
	*List = this;
	}

	void removeFromList() {
	*Prev = Next;
	if (Next) Next->Prev = Prev;
	}
	};

	/// simplify_type specializations - Allow casting operators to work directly on
	/// SDValues as if they were SDNode*'s.
	template<> struct simplify_type<SDUse> {
	using SimpleType = SDNode *;

	static SimpleType getSimplifiedValue(SDUse &Val) {
	return Val.getNode();
	}
	};

	/// These are IR-level optimization flags that may be propagated to SDNodes.
	/// TODO: This data structure should be shared by the IR optimizer and the
	/// the backend.
	struct SDNodeFlags {
	private:
	// This bit is used to determine if the flags are in a defined state.
	// Flag bits can only be masked out during intersection if the masking flags
	// are defined.
	bool AnyDefined : 1;

	bool NoUnsignedWrap : 1;
	bool NoSignedWrap : 1;
	bool Exact : 1;
	bool UnsafeAlgebra : 1;
	bool NoNaNs : 1;
	bool NoInfs : 1;
	bool NoSignedZeros : 1;
	bool AllowReciprocal : 1;
	bool VectorReduction : 1;
	bool AllowContract : 1;

	public:
	/// Default constructor turns off all optimization flags.
	SDNodeFlags()
	: AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false),
	Exact(false), UnsafeAlgebra(false), NoNaNs(false), NoInfs(false),
	NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
	AllowContract(false) {}

	/// Sets the state of the flags to the defined state.
	void setDefined() { AnyDefined = true; }
	/// Returns true if the flags are in a defined state.
	bool isDefined() const { return AnyDefined; }

	// These are mutators for each flag.
	void setNoUnsignedWrap(bool b) {
	setDefined();
	NoUnsignedWrap = b;
	}
	void setNoSignedWrap(bool b) {
	setDefined();
	NoSignedWrap = b;
	}
	void setExact(bool b) {
	setDefined();
	Exact = b;
	}
	void setUnsafeAlgebra(bool b) {
	setDefined();
	UnsafeAlgebra = b;
	}
	void setNoNaNs(bool b) {
	setDefined();
	NoNaNs = b;
	}
	void setNoInfs(bool b) {
	setDefined();
	NoInfs = b;
	}
	void setNoSignedZeros(bool b) {
	setDefined();
	NoSignedZeros = b;
	}
	void setAllowReciprocal(bool b) {
	setDefined();
	AllowReciprocal = b;
	}
	void setVectorReduction(bool b) {
	setDefined();
	VectorReduction = b;
	}
	void setAllowContract(bool b) {
	setDefined();
	AllowContract = b;
	}

	// These are accessors for each flag.
	bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
	bool hasNoSignedWrap() const { return NoSignedWrap; }
	bool hasExact() const { return Exact; }
	bool hasUnsafeAlgebra() const { return UnsafeAlgebra; }
	bool hasNoNaNs() const { return NoNaNs; }
	bool hasNoInfs() const { return NoInfs; }
	bool hasNoSignedZeros() const { return NoSignedZeros; }
	bool hasAllowReciprocal() const { return AllowReciprocal; }
	bool hasVectorReduction() const { return VectorReduction; }
	bool hasAllowContract() const { return AllowContract; }

	/// Clear any flags in this flag set that aren't also set in Flags.
	/// If the given Flags are undefined then don't do anything.
	void intersectWith(const SDNodeFlags Flags) {
	if (!Flags.isDefined())
	return;
	NoUnsignedWrap &= Flags.NoUnsignedWrap;
	NoSignedWrap &= Flags.NoSignedWrap;
	Exact &= Flags.Exact;
	UnsafeAlgebra &= Flags.UnsafeAlgebra;
	NoNaNs &= Flags.NoNaNs;
	NoInfs &= Flags.NoInfs;
	NoSignedZeros &= Flags.NoSignedZeros;
	AllowReciprocal &= Flags.AllowReciprocal;
	VectorReduction &= Flags.VectorReduction;
	AllowContract &= Flags.AllowContract;
	}
	};

	/// Represents one node in the SelectionDAG.
	///
	class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
	private:
	/// The operation that this node performs.
	int16_t NodeType;

	protected:
	// We define a set of mini-helper classes to help us interpret the bits in our
	// SubclassData. These are designed to fit within a uint16_t so they pack
	// with NodeType.

	class SDNodeBitfields {
	friend class SDNode;
	friend class MemIntrinsicSDNode;
	friend class MemSDNode;

	uint16_t HasDebugValue : 1;
	uint16_t IsMemIntrinsic : 1;
	};
	enum { NumSDNodeBits = 2 };

	class ConstantSDNodeBitfields {
	friend class ConstantSDNode;

	uint16_t : NumSDNodeBits;

	uint16_t IsOpaque : 1;
	};

	class MemSDNodeBitfields {
	friend class MemSDNode;
	friend class MemIntrinsicSDNode;
	friend class AtomicSDNode;

	uint16_t : NumSDNodeBits;

	uint16_t IsVolatile : 1;
	uint16_t IsNonTemporal : 1;
	uint16_t IsDereferenceable : 1;
	uint16_t IsInvariant : 1;
	};
	enum { NumMemSDNodeBits = NumSDNodeBits + 4 };

	class LSBaseSDNodeBitfields {
	friend class LSBaseSDNode;

	uint16_t : NumMemSDNodeBits;

	uint16_t AddressingMode : 3; // enum ISD::MemIndexedMode
	};
	enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };

	class LoadSDNodeBitfields {
	friend class LoadSDNode;
	friend class MaskedLoadSDNode;

	uint16_t : NumLSBaseSDNodeBits;

	uint16_t ExtTy : 2; // enum ISD::LoadExtType
	uint16_t IsExpanding : 1;
	};

	class StoreSDNodeBitfields {
	friend class StoreSDNode;
	friend class MaskedStoreSDNode;

	uint16_t : NumLSBaseSDNodeBits;

	uint16_t IsTruncating : 1;
	uint16_t IsCompressing : 1;
	};

	union {
	char RawSDNodeBits[sizeof(uint16_t)];
	SDNodeBitfields SDNodeBits;
	ConstantSDNodeBitfields ConstantSDNodeBits;
	MemSDNodeBitfields MemSDNodeBits;
	LSBaseSDNodeBitfields LSBaseSDNodeBits;
	LoadSDNodeBitfields LoadSDNodeBits;
	StoreSDNodeBitfields StoreSDNodeBits;
	};

	// RawSDNodeBits must cover the entirety of the union. This means that all of
	// the union's members must have size <= RawSDNodeBits. We write the RHS as
	// "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
	static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
	static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
	static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
	static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
	static_assert(sizeof(LoadSDNodeBitfields) <= 4, "field too wide");
	static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");

	private:
	friend class SelectionDAG;
	// TODO: unfriend HandleSDNode once we fix its operand handling.
	friend class HandleSDNode;

	/// Unique id per SDNode in the DAG.
	int NodeId = -1;

	/// The values that are used by this operation.
	SDUse *OperandList = nullptr;

	/// The types of the values this node defines. SDNode's may
	/// define multiple values simultaneously.
	const EVT *ValueList;

	/// List of uses for this SDNode.
	SDUse *UseList = nullptr;

	/// The number of entries in the Operand/Value list.
	unsigned short NumOperands = 0;
	unsigned short NumValues;

	// The ordering of the SDNodes. It roughly corresponds to the ordering of the
	// original LLVM instructions.
	// This is used for turning off scheduling, because we'll forgo
	// the normal scheduling algorithms and output the instructions according to
	// this ordering.
	unsigned IROrder;

	/// Source line information.
	DebugLoc debugLoc;

	/// Return a pointer to the specified value type.
	static const EVT *getValueTypeList(EVT VT);

	SDNodeFlags Flags;

	public:
	/// Unique and persistent id per SDNode in the DAG.
	/// Used for debug printing.
	uint16_t PersistentId;

	//===--------------------------------------------------------------------===//
	// Accessors
	//

	/// Return the SelectionDAG opcode value for this node. For
	/// pre-isel nodes (those for which isMachineOpcode returns false), these
	/// are the opcode values in the ISD and <target>ISD namespaces. For
	/// post-isel opcodes, see getMachineOpcode.
	unsigned getOpcode() const { return (unsigned short)NodeType; }

	/// Test if this node has a target-specific opcode (in the
	/// \<target\>ISD namespace).
	bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }

	/// Test if this node has a target-specific
	/// memory-referencing opcode (in the \<target\>ISD namespace and
	/// greater than FIRST_TARGET_MEMORY_OPCODE).
	bool isTargetMemoryOpcode() const {
	return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
	}

	/// Return true if the type of the node type undefined.
	bool isUndef() const { return NodeType == ISD::UNDEF; }

	/// Test if this node is a memory intrinsic (with valid pointer information).
	/// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
	/// non-memory intrinsics (with chains) that are not really instances of
	/// MemSDNode. For such nodes, we need some extra state to determine the
	/// proper classof relationship.
	bool isMemIntrinsic() const {
	return (NodeType == ISD::INTRINSIC_W_CHAIN \|\|
	NodeType == ISD::INTRINSIC_VOID) &&
	SDNodeBits.IsMemIntrinsic;
	}

	/// Test if this node is a strict floating point pseudo-op.
	bool isStrictFPOpcode() {
	switch (NodeType) {
	default:
	return false;
	case ISD::STRICT_FADD:
	case ISD::STRICT_FSUB:
	case ISD::STRICT_FMUL:
	case ISD::STRICT_FDIV:
	case ISD::STRICT_FREM:
	case ISD::STRICT_FSQRT:
	case ISD::STRICT_FPOW:
	case ISD::STRICT_FPOWI:
	case ISD::STRICT_FSIN:
	case ISD::STRICT_FCOS:
	case ISD::STRICT_FEXP:
	case ISD::STRICT_FEXP2:
	case ISD::STRICT_FLOG:
	case ISD::STRICT_FLOG10:
	case ISD::STRICT_FLOG2:
	case ISD::STRICT_FRINT:
	case ISD::STRICT_FNEARBYINT:
	return true;
	}
	}

	/// Test if this node has a post-isel opcode, directly
	/// corresponding to a MachineInstr opcode.
	bool isMachineOpcode() const { return NodeType < 0; }

	/// This may only be called if isMachineOpcode returns
	/// true. It returns the MachineInstr opcode value that the node's opcode
	/// corresponds to.
	unsigned getMachineOpcode() const {
	assert(isMachineOpcode() && "Not a MachineInstr opcode!");
	return ~NodeType;
	}

	bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
	void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }

	/// Return true if there are no uses of this node.
	bool use_empty() const { return UseList == nullptr; }

	/// Return true if there is exactly one use of this node.
	bool hasOneUse() const {
	return !use_empty() && std::next(use_begin()) == use_end();
	}

	/// Return the number of uses of this node. This method takes
	/// time proportional to the number of uses.
	size_t use_size() const { return std::distance(use_begin(), use_end()); }

	/// Return the unique node id.
	int getNodeId() const { return NodeId; }

	/// Set unique node id.
	void setNodeId(int Id) { NodeId = Id; }

	/// Return the node ordering.
	unsigned getIROrder() const { return IROrder; }

	/// Set the node ordering.
	void setIROrder(unsigned Order) { IROrder = Order; }

	/// Return the source location info.
	const DebugLoc &getDebugLoc() const { return debugLoc; }

	/// Set source location info. Try to avoid this, putting
	/// it in the constructor is preferable.
	void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }

	/// This class provides iterator support for SDUse
	/// operands that use a specific SDNode.
	class use_iterator
	: public std::iterator<std::forward_iterator_tag, SDUse, ptrdiff_t> {
	friend class SDNode;

	SDUse *Op = nullptr;

	explicit use_iterator(SDUse *op) : Op(op) {}

	public:
	using reference = std::iterator<std::forward_iterator_tag,
	SDUse, ptrdiff_t>::reference;
	using pointer = std::iterator<std::forward_iterator_tag,
	SDUse, ptrdiff_t>::pointer;

	use_iterator() = default;
	use_iterator(const use_iterator &I) : Op(I.Op) {}

	bool operator==(const use_iterator &x) const {
	return Op == x.Op;
	}
	bool operator!=(const use_iterator &x) const {
	return !operator==(x);
	}

	/// Return true if this iterator is at the end of uses list.
	bool atEnd() const { return Op == nullptr; }

	// Iterator traversal: forward iteration only.
	use_iterator &operator++() { // Preincrement
	assert(Op && "Cannot increment end iterator!");
	Op = Op->getNext();
	return *this;
	}

	use_iterator operator++(int) { // Postincrement
	use_iterator tmp = this; ++this; return tmp;
	}

	/// Retrieve a pointer to the current user node.
	SDNode operator() const {
	assert(Op && "Cannot dereference end iterator!");
	return Op->getUser();
	}

	SDNode operator->() const { return operator(); }

	SDUse &getUse() const { return *Op; }

	/// Retrieve the operand # of this use in its user.
	unsigned getOperandNo() const {
	assert(Op && "Cannot dereference end iterator!");
	return (unsigned)(Op - Op->getUser()->OperandList);
	}
	};

	/// Provide iteration support to walk over all uses of an SDNode.
	use_iterator use_begin() const {
	return use_iterator(UseList);
	}

	static use_iterator use_end() { return use_iterator(nullptr); }

	inline iterator_range<use_iterator> uses() {
	return make_range(use_begin(), use_end());
	}
	inline iterator_range<use_iterator> uses() const {
	return make_range(use_begin(), use_end());
	}

	/// Return true if there are exactly NUSES uses of the indicated value.
	/// This method ignores uses of other values defined by this operation.
	bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;

	/// Return true if there are any use of the indicated value.
	/// This method ignores uses of other values defined by this operation.
	bool hasAnyUseOfValue(unsigned Value) const;

	/// Return true if this node is the only use of N.
	bool isOnlyUserOf(const SDNode *N) const;

	/// Return true if this node is an operand of N.
	bool isOperandOf(const SDNode *N) const;

	/// Return true if this node is a predecessor of N.
	/// NOTE: Implemented on top of hasPredecessor and every bit as
	/// expensive. Use carefully.
	bool isPredecessorOf(const SDNode *N) const {
	return N->hasPredecessor(this);
	}

	/// Return true if N is a predecessor of this node.
	/// N is either an operand of this node, or can be reached by recursively
	/// traversing up the operands.
	/// NOTE: This is an expensive method. Use it carefully.
	bool hasPredecessor(const SDNode *N) const;

	/// Returns true if N is a predecessor of any node in Worklist. This
	/// helper keeps Visited and Worklist sets externally to allow unions
	/// searches to be performed in parallel, caching of results across
	/// queries and incremental addition to Worklist. Stops early if N is
	/// found but will resume. Remember to clear Visited and Worklists
	/// if DAG changes.
	static bool hasPredecessorHelper(const SDNode *N,
	SmallPtrSetImpl<const SDNode *> &Visited,
	SmallVectorImpl<const SDNode *> &Worklist) {
	if (Visited.count(N))
	return true;
	while (!Worklist.empty()) {
	const SDNode *M = Worklist.pop_back_val();
	bool Found = false;
	for (const SDValue &OpV : M->op_values()) {
	SDNode *Op = OpV.getNode();
	if (Visited.insert(Op).second)
	Worklist.push_back(Op);
	if (Op == N)
	Found = true;
	}
	if (Found)
	return true;
	}
	return false;
	}

	/// Return true if all the users of N are contained in Nodes.
	/// NOTE: Requires at least one match, but doesn't require them all.
	static bool areOnlyUsersOf(ArrayRef<const SDNode > Nodes, const SDNode N);

	/// Return the number of values used by this operation.
	unsigned getNumOperands() const { return NumOperands; }

	/// Helper method returns the integer value of a ConstantSDNode operand.
	inline uint64_t getConstantOperandVal(unsigned Num) const;

	const SDValue &getOperand(unsigned Num) const {
	assert(Num < NumOperands && "Invalid child # of SDNode!");
	return OperandList[Num];
	}

	using op_iterator = SDUse *;

	op_iterator op_begin() const { return OperandList; }
	op_iterator op_end() const { return OperandList+NumOperands; }
	ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }

	/// Iterator for directly iterating over the operand SDValue's.
	struct value_op_iterator
	: iterator_adaptor_base<value_op_iterator, op_iterator,
	std::random_access_iterator_tag, SDValue,
	ptrdiff_t, value_op_iterator *,
	value_op_iterator *> {
	explicit value_op_iterator(SDUse *U = nullptr)
	: iterator_adaptor_base(U) {}

	const SDValue &operator*() const { return I->get(); }
	};

	iterator_range<value_op_iterator> op_values() const {
	return make_range(value_op_iterator(op_begin()),
	value_op_iterator(op_end()));
	}

	SDVTList getVTList() const {
	SDVTList X = { ValueList, NumValues };
	return X;
	}

	/// If this node has a glue operand, return the node
	/// to which the glue operand points. Otherwise return NULL.
	SDNode *getGluedNode() const {
	if (getNumOperands() != 0 &&
	getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
	return getOperand(getNumOperands()-1).getNode();
	return nullptr;
	}

	/// If this node has a glue value with a user, return
	/// the user (there is at most one). Otherwise return NULL.
	SDNode *getGluedUser() const {
	for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
	if (UI.getUse().get().getValueType() == MVT::Glue)
	return *UI;
	return nullptr;
	}

	const SDNodeFlags getFlags() const { return Flags; }
	void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }

	/// Clear any flags in this node that aren't also set in Flags.
	/// If Flags is not in a defined state then this has no effect.
	void intersectFlagsWith(const SDNodeFlags Flags);

	/// Return the number of values defined/returned by this operator.
	unsigned getNumValues() const { return NumValues; }

	/// Return the type of a specified result.
	EVT getValueType(unsigned ResNo) const {
	assert(ResNo < NumValues && "Illegal result number!");
	return ValueList[ResNo];
	}

	/// Return the type of a specified result as a simple type.
	MVT getSimpleValueType(unsigned ResNo) const {
	return getValueType(ResNo).getSimpleVT();
	}

	/// Returns MVT::getSizeInBits(getValueType(ResNo)).
	unsigned getValueSizeInBits(unsigned ResNo) const {
	return getValueType(ResNo).getSizeInBits();
	}

	using value_iterator = const EVT *;

	value_iterator value_begin() const { return ValueList; }
	value_iterator value_end() const { return ValueList+NumValues; }

	/// Return the opcode of this operation for printing.
	std::string getOperationName(const SelectionDAG *G = nullptr) const;
	static const char* getIndexedModeName(ISD::MemIndexedMode AM);
	void print_types(raw_ostream &OS, const SelectionDAG *G) const;
	void print_details(raw_ostream &OS, const SelectionDAG *G) const;
	void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
	void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;

	/// Print a SelectionDAG node and all children down to
	/// the leaves. The given SelectionDAG allows target-specific nodes
	/// to be printed in human-readable form. Unlike printr, this will
	/// print the whole DAG, including children that appear multiple
	/// times.
	///
	void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;

	/// Print a SelectionDAG node and children up to
	/// depth "depth." The given SelectionDAG allows target-specific
	/// nodes to be printed in human-readable form. Unlike printr, this
	/// will print children that appear multiple times wherever they are
	/// used.
	///
	void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
	unsigned depth = 100) const;

	/// Dump this node, for debugging.
	void dump() const;

	/// Dump (recursively) this node and its use-def subgraph.
	void dumpr() const;

	/// Dump this node, for debugging.
	/// The given SelectionDAG allows target-specific nodes to be printed
	/// in human-readable form.
	void dump(const SelectionDAG *G) const;

	/// Dump (recursively) this node and its use-def subgraph.
	/// The given SelectionDAG allows target-specific nodes to be printed
	/// in human-readable form.
	void dumpr(const SelectionDAG *G) const;

	/// printrFull to dbgs(). The given SelectionDAG allows
	/// target-specific nodes to be printed in human-readable form.
	/// Unlike dumpr, this will print the whole DAG, including children
	/// that appear multiple times.
	void dumprFull(const SelectionDAG *G = nullptr) const;

	/// printrWithDepth to dbgs(). The given
	/// SelectionDAG allows target-specific nodes to be printed in
	/// human-readable form. Unlike dumpr, this will print children
	/// that appear multiple times wherever they are used.
	///
	void dumprWithDepth(const SelectionDAG *G = nullptr,
	unsigned depth = 100) const;

	/// Gather unique data for the node.
	void Profile(FoldingSetNodeID &ID) const;

	/// This method should only be used by the SDUse class.
	void addUse(SDUse &U) { U.addToList(&UseList); }

	protected:
	static SDVTList getSDVTList(EVT VT) {
	SDVTList Ret = { getValueTypeList(VT), 1 };
	return Ret;
	}

	/// Create an SDNode.
	///
	/// SDNodes are created without any operands, and never own the operand
	/// storage. To add operands, see SelectionDAG::createOperands.
	SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
	: NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
	IROrder(Order), debugLoc(std::move(dl)) {
	memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
	assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
	assert(NumValues == VTs.NumVTs &&
	"NumValues wasn't wide enough for its operands!");
	}

	/// Release the operands and set this node to have zero operands.
	void DropOperands();
	};

	/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
	/// into SDNode creation functions.
	/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
	/// from the original Instruction, and IROrder is the ordinal position of
	/// the instruction.
	/// When an SDNode is created after the DAG is being built, both DebugLoc and
	/// the IROrder are propagated from the original SDNode.
	/// So SDLoc class provides two constructors besides the default one, one to
	/// be used by the DAGBuilder, the other to be used by others.
	class SDLoc {
	private:
	DebugLoc DL;
	int IROrder = 0;

	public:
	SDLoc() = default;
	SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
	SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
	SDLoc(const Instruction *I, int Order) : IROrder(Order) {
	assert(Order >= 0 && "bad IROrder");
	if (I)
	DL = I->getDebugLoc();
	}

	unsigned getIROrder() const { return IROrder; }
	const DebugLoc &getDebugLoc() const { return DL; }
	};

	// Define inline functions from the SDValue class.

	inline SDValue::SDValue(SDNode *node, unsigned resno)
	: Node(node), ResNo(resno) {
	// Explicitly check for !ResNo to avoid use-after-free, because there are
	// callers that use SDValue(N, 0) with a deleted N to indicate successful
	// combines.
	assert((!Node \|\| !ResNo \|\| ResNo < Node->getNumValues()) &&
	"Invalid result number for the given node!");
	assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.");
	}

	inline unsigned SDValue::getOpcode() const {
	return Node->getOpcode();
	}

	inline EVT SDValue::getValueType() const {
	return Node->getValueType(ResNo);
	}

	inline unsigned SDValue::getNumOperands() const {
	return Node->getNumOperands();
	}

	inline const SDValue &SDValue::getOperand(unsigned i) const {
	return Node->getOperand(i);
	}

	inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
	return Node->getConstantOperandVal(i);
	}

	inline bool SDValue::isTargetOpcode() const {
	return Node->isTargetOpcode();
	}

	inline bool SDValue::isTargetMemoryOpcode() const {
	return Node->isTargetMemoryOpcode();
	}

	inline bool SDValue::isMachineOpcode() const {
	return Node->isMachineOpcode();
	}

	inline unsigned SDValue::getMachineOpcode() const {
	return Node->getMachineOpcode();
	}

	inline bool SDValue::isUndef() const {
	return Node->isUndef();
	}

	inline bool SDValue::use_empty() const {
	return !Node->hasAnyUseOfValue(ResNo);
	}

	inline bool SDValue::hasOneUse() const {
	return Node->hasNUsesOfValue(1, ResNo);
	}

	inline const DebugLoc &SDValue::getDebugLoc() const {
	return Node->getDebugLoc();
	}

	inline void SDValue::dump() const {
	return Node->dump();
	}

	inline void SDValue::dumpr() const {
	return Node->dumpr();
	}

	// Define inline functions from the SDUse class.

	inline void SDUse::set(const SDValue &V) {
	if (Val.getNode()) removeFromList();
	Val = V;
	if (V.getNode()) V.getNode()->addUse(*this);
	}

	inline void SDUse::setInitial(const SDValue &V) {
	Val = V;
	V.getNode()->addUse(*this);
	}

	inline void SDUse::setNode(SDNode *N) {
	if (Val.getNode()) removeFromList();
	Val.setNode(N);
	if (N) N->addUse(*this);
	}

	/// This class is used to form a handle around another node that
	/// is persistent and is updated across invocations of replaceAllUsesWith on its
	/// operand. This node should be directly created by end-users and not added to
	/// the AllNodes list.
	class HandleSDNode : public SDNode {
	SDUse Op;

	public:
	explicit HandleSDNode(SDValue X)
	: SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
	// HandleSDNodes are never inserted into the DAG, so they won't be
	// auto-numbered. Use ID 65535 as a sentinel.
	PersistentId = 0xffff;

	// Manually set up the operand list. This node type is special in that it's
	// always stack allocated and SelectionDAG does not manage its operands.
	// TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
	// be so special.
	Op.setUser(this);
	Op.setInitial(X);
	NumOperands = 1;
	OperandList = &Op;
	}
	~HandleSDNode();

	const SDValue &getValue() const { return Op; }
	};

	class AddrSpaceCastSDNode : public SDNode {
	private:
	unsigned SrcAddrSpace;
	unsigned DestAddrSpace;

	public:
	AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
	unsigned SrcAS, unsigned DestAS);

	unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
	unsigned getDestAddressSpace() const { return DestAddrSpace; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::ADDRSPACECAST;
	}
	};

	/// This is an abstract virtual class for memory operations.
	class MemSDNode : public SDNode {
	private:
	// VT of in-memory value.
	EVT MemoryVT;

	protected:
	/// Memory reference information.
	MachineMemOperand *MMO;

	public:
	MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
	EVT MemoryVT, MachineMemOperand *MMO);

	bool readMem() const { return MMO->isLoad(); }
	bool writeMem() const { return MMO->isStore(); }

	/// Returns alignment and volatility of the memory access
	unsigned getOriginalAlignment() const {
	return MMO->getBaseAlignment();
	}
	unsigned getAlignment() const {
	return MMO->getAlignment();
	}

	/// Return the SubclassData value, without HasDebugValue. This contains an
	/// encoding of the volatile flag, as well as bits used by subclasses. This
	/// function should only be used to compute a FoldingSetNodeID value.
	/// The HasDebugValue bit is masked out because CSE map needs to match
	/// nodes with debug info with nodes without debug info.
	unsigned getRawSubclassData() const {
	uint16_t Data;
	union {
	char RawSDNodeBits[sizeof(uint16_t)];
	SDNodeBitfields SDNodeBits;
	};
	memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
	SDNodeBits.HasDebugValue = 0;
	memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
	return Data;
	}

	bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
	bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
	bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
	bool isInvariant() const { return MemSDNodeBits.IsInvariant; }

	// Returns the offset from the location of the access.
	int64_t getSrcValueOffset() const { return MMO->getOffset(); }

	/// Returns the AA info that describes the dereference.
	AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }

	/// Returns the Ranges that describes the dereference.
	const MDNode *getRanges() const { return MMO->getRanges(); }

	/// Returns the synchronization scope ID for this memory operation.
	SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }

	/// Return the atomic ordering requirements for this memory operation. For
	/// cmpxchg atomic operations, return the atomic ordering requirements when
	/// store occurs.
	AtomicOrdering getOrdering() const { return MMO->getOrdering(); }

	/// Return the type of the in-memory value.
	EVT getMemoryVT() const { return MemoryVT; }

	/// Return a MachineMemOperand object describing the memory
	/// reference performed by operation.
	MachineMemOperand *getMemOperand() const { return MMO; }

	const MachinePointerInfo &getPointerInfo() const {
	return MMO->getPointerInfo();
	}

	/// Return the address space for the associated pointer
	unsigned getAddressSpace() const {
	return getPointerInfo().getAddrSpace();
	}

	/// Update this MemSDNode's MachineMemOperand information
	/// to reflect the alignment of NewMMO, if it has a greater alignment.
	/// This must only be used when the new alignment applies to all users of
	/// this MachineMemOperand.
	void refineAlignment(const MachineMemOperand *NewMMO) {
	MMO->refineAlignment(NewMMO);
	}

	const SDValue &getChain() const { return getOperand(0); }
	const SDValue &getBasePtr() const {
	return getOperand(getOpcode() == ISD::STORE ? 2 : 1);
	}

	// Methods to support isa and dyn_cast
	static bool classof(const SDNode *N) {
	// For some targets, we lower some target intrinsics to a MemIntrinsicNode
	// with either an intrinsic or a target opcode.
	return N->getOpcode() == ISD::LOAD \|\|
	N->getOpcode() == ISD::STORE \|\|
	N->getOpcode() == ISD::PREFETCH \|\|
	N->getOpcode() == ISD::ATOMIC_CMP_SWAP \|\|
	N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS \|\|
	N->getOpcode() == ISD::ATOMIC_SWAP \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_ADD \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_SUB \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_AND \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_OR \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_XOR \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_NAND \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_MIN \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_MAX \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_UMIN \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_UMAX \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD \|\|
	N->getOpcode() == ISD::ATOMIC_STORE \|\|
	N->getOpcode() == ISD::MLOAD \|\|
	N->getOpcode() == ISD::MSTORE \|\|
	N->getOpcode() == ISD::MGATHER \|\|
	N->getOpcode() == ISD::MSCATTER \|\|
	N->isMemIntrinsic() \|\|
	N->isTargetMemoryOpcode();
	}
	};

	/// This is an SDNode representing atomic operations.
	class AtomicSDNode : public MemSDNode {
	public:
	AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
	EVT MemVT, MachineMemOperand *MMO)
	: MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {}

	const SDValue &getBasePtr() const { return getOperand(1); }
	const SDValue &getVal() const { return getOperand(2); }

	/// Returns true if this SDNode represents cmpxchg atomic operation, false
	/// otherwise.
	bool isCompareAndSwap() const {
	unsigned Op = getOpcode();
	return Op == ISD::ATOMIC_CMP_SWAP \|\|
	Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
	}

	/// For cmpxchg atomic operations, return the atomic ordering requirements
	/// when store does not occur.
	AtomicOrdering getFailureOrdering() const {
	assert(isCompareAndSwap() && "Must be cmpxchg operation");
	return MMO->getFailureOrdering();
	}

	// Methods to support isa and dyn_cast
	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::ATOMIC_CMP_SWAP \|\|
	N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS \|\|
	N->getOpcode() == ISD::ATOMIC_SWAP \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_ADD \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_SUB \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_AND \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_OR \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_XOR \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_NAND \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_MIN \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_MAX \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_UMIN \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD_UMAX \|\|
	N->getOpcode() == ISD::ATOMIC_LOAD \|\|
	N->getOpcode() == ISD::ATOMIC_STORE;
	}
	};

	/// This SDNode is used for target intrinsics that touch
	/// memory and need an associated MachineMemOperand. Its opcode may be
	/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
	/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
	class MemIntrinsicSDNode : public MemSDNode {
	public:
	MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
	SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
	: MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
	SDNodeBits.IsMemIntrinsic = true;
	}

	// Methods to support isa and dyn_cast
	static bool classof(const SDNode *N) {
	// We lower some target intrinsics to their target opcode
	// early a node with a target opcode can be of this class
	return N->isMemIntrinsic() \|\|
	N->getOpcode() == ISD::PREFETCH \|\|
	N->isTargetMemoryOpcode();
	}
	};

	/// This SDNode is used to implement the code generator
	/// support for the llvm IR shufflevector instruction. It combines elements
	/// from two input vectors into a new input vector, with the selection and
	/// ordering of elements determined by an array of integers, referred to as
	/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
	/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
	/// An index of -1 is treated as undef, such that the code generator may put
	/// any value in the corresponding element of the result.
	class ShuffleVectorSDNode : public SDNode {
	// The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
	// is freed when the SelectionDAG object is destroyed.
	const int *Mask;

	protected:
	friend class SelectionDAG;

	ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
	: SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}

	public:
	ArrayRef<int> getMask() const {
	EVT VT = getValueType(0);
	return makeArrayRef(Mask, VT.getVectorNumElements());
	}

	int getMaskElt(unsigned Idx) const {
	assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!");
	return Mask[Idx];
	}

	bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }

	int getSplatIndex() const {
	assert(isSplat() && "Cannot get splat index for non-splat!");
	EVT VT = getValueType(0);
	for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
	if (Mask[i] >= 0)
	return Mask[i];
	}
	llvm_unreachable("Splat with all undef indices?");
	}

	static bool isSplatMask(const int *Mask, EVT VT);

	/// Change values in a shuffle permute mask assuming
	/// the two vector operands have swapped position.
	static void commuteMask(MutableArrayRef<int> Mask) {
	unsigned NumElems = Mask.size();
	for (unsigned i = 0; i != NumElems; ++i) {
	int idx = Mask[i];
	if (idx < 0)
	continue;
	else if (idx < (int)NumElems)
	Mask[i] = idx + NumElems;
	else
	Mask[i] = idx - NumElems;
	}
	}

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::VECTOR_SHUFFLE;
	}
	};

	class ConstantSDNode : public SDNode {
	friend class SelectionDAG;

	const ConstantInt *Value;

	ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val,
	const DebugLoc &DL, EVT VT)
	: SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DL,
	getSDVTList(VT)),
	Value(val) {
	ConstantSDNodeBits.IsOpaque = isOpaque;
	}

	public:
	const ConstantInt *getConstantIntValue() const { return Value; }
	const APInt &getAPIntValue() const { return Value->getValue(); }
	uint64_t getZExtValue() const { return Value->getZExtValue(); }
	int64_t getSExtValue() const { return Value->getSExtValue(); }

	bool isOne() const { return Value->isOne(); }
	bool isNullValue() const { return Value->isZero(); }
	bool isAllOnesValue() const { return Value->isMinusOne(); }

	bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::Constant \|\|
	N->getOpcode() == ISD::TargetConstant;
	}
	};

	uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
	return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
	}

	class ConstantFPSDNode : public SDNode {
	friend class SelectionDAG;

	const ConstantFP *Value;

	ConstantFPSDNode(bool isTarget, const ConstantFP *val, const DebugLoc &DL,
	EVT VT)
	: SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0, DL,
	getSDVTList(VT)),
	Value(val) {}

	public:
	const APFloat& getValueAPF() const { return Value->getValueAPF(); }
	const ConstantFP *getConstantFPValue() const { return Value; }

	/// Return true if the value is positive or negative zero.
	bool isZero() const { return Value->isZero(); }

	/// Return true if the value is a NaN.
	bool isNaN() const { return Value->isNaN(); }

	/// Return true if the value is an infinity
	bool isInfinity() const { return Value->isInfinity(); }

	/// Return true if the value is negative.
	bool isNegative() const { return Value->isNegative(); }

	/// We don't rely on operator== working on double values, as
	/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
	/// As such, this method can be used to do an exact bit-for-bit comparison of
	/// two floating point values.

	/// We leave the version with the double argument here because it's just so
	/// convenient to write "2.0" and the like. Without this function we'd
	/// have to duplicate its logic everywhere it's called.
	bool isExactlyValue(double V) const {
	bool ignored;
	APFloat Tmp(V);
	Tmp.convert(Value->getValueAPF().getSemantics(),
	APFloat::rmNearestTiesToEven, &ignored);
	return isExactlyValue(Tmp);
	}
	bool isExactlyValue(const APFloat& V) const;

	static bool isValueValidForType(EVT VT, const APFloat& Val);

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::ConstantFP \|\|
	N->getOpcode() == ISD::TargetConstantFP;
	}
	};

	/// Returns true if \p V is a constant integer zero.
	bool isNullConstant(SDValue V);

	/// Returns true if \p V is an FP constant with a value of positive zero.
	bool isNullFPConstant(SDValue V);

	/// Returns true if \p V is an integer constant with all bits set.
	bool isAllOnesConstant(SDValue V);

	/// Returns true if \p V is a constant integer one.
	bool isOneConstant(SDValue V);

	/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
	/// constant is canonicalized to be operand 1.
	bool isBitwiseNot(SDValue V);

	/// Returns the SDNode if it is a constant splat BuildVector or constant int.
	ConstantSDNode *isConstOrConstSplat(SDValue V);

	/// Returns the SDNode if it is a constant splat BuildVector or constant float.
	ConstantFPSDNode *isConstOrConstSplatFP(SDValue V);

	class GlobalAddressSDNode : public SDNode {
	friend class SelectionDAG;

	const GlobalValue *TheGlobal;
	int64_t Offset;
	unsigned char TargetFlags;

	GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
	const GlobalValue *GA, EVT VT, int64_t o,
	unsigned char TargetFlags);

	public:
	const GlobalValue *getGlobal() const { return TheGlobal; }
	int64_t getOffset() const { return Offset; }
	unsigned char getTargetFlags() const { return TargetFlags; }
	// Return the address space this GlobalAddress belongs to.
	unsigned getAddressSpace() const;

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::GlobalAddress \|\|
	N->getOpcode() == ISD::TargetGlobalAddress \|\|
	N->getOpcode() == ISD::GlobalTLSAddress \|\|
	N->getOpcode() == ISD::TargetGlobalTLSAddress;
	}
	};

	class FrameIndexSDNode : public SDNode {
	friend class SelectionDAG;

	int FI;

	FrameIndexSDNode(int fi, EVT VT, bool isTarg)
	: SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
	0, DebugLoc(), getSDVTList(VT)), FI(fi) {
	}

	public:
	int getIndex() const { return FI; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::FrameIndex \|\|
	N->getOpcode() == ISD::TargetFrameIndex;
	}
	};

	class JumpTableSDNode : public SDNode {
	friend class SelectionDAG;

	int JTI;
	unsigned char TargetFlags;

	JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned char TF)
	: SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
	0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
	}

	public:
	int getIndex() const { return JTI; }
	unsigned char getTargetFlags() const { return TargetFlags; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::JumpTable \|\|
	N->getOpcode() == ISD::TargetJumpTable;
	}
	};

	class ConstantPoolSDNode : public SDNode {
	friend class SelectionDAG;

	union {
	const Constant *ConstVal;
	MachineConstantPoolValue *MachineCPVal;
	} Val;
	int Offset; // It's a MachineConstantPoolValue if top bit is set.
	unsigned Alignment; // Minimum alignment requirement of CP (not log2 value).
	unsigned char TargetFlags;

	ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
	unsigned Align, unsigned char TF)
	: SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
	DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align),
	TargetFlags(TF) {
	assert(Offset >= 0 && "Offset is too large");
	Val.ConstVal = c;
	}

	ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
	EVT VT, int o, unsigned Align, unsigned char TF)
	: SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
	DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align),
	TargetFlags(TF) {
	assert(Offset >= 0 && "Offset is too large");
	Val.MachineCPVal = v;
	Offset \|= 1 << (sizeof(unsigned)*CHAR_BIT-1);
	}

	public:
	bool isMachineConstantPoolEntry() const {
	return Offset < 0;
	}

	const Constant *getConstVal() const {
	assert(!isMachineConstantPoolEntry() && "Wrong constantpool type");
	return Val.ConstVal;
	}

	MachineConstantPoolValue *getMachineCPVal() const {
	assert(isMachineConstantPoolEntry() && "Wrong constantpool type");
	return Val.MachineCPVal;
	}

	int getOffset() const {
	return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT-1));
	}

	// Return the alignment of this constant pool object, which is either 0 (for
	// default alignment) or the desired value.
	unsigned getAlignment() const { return Alignment; }
	unsigned char getTargetFlags() const { return TargetFlags; }

	Type *getType() const;

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::ConstantPool \|\|
	N->getOpcode() == ISD::TargetConstantPool;
	}
	};

	/// Completely target-dependent object reference.
	class TargetIndexSDNode : public SDNode {
	friend class SelectionDAG;

	unsigned char TargetFlags;
	int Index;
	int64_t Offset;

	public:
	TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned char TF)
	: SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
	TargetFlags(TF), Index(Idx), Offset(Ofs) {}

	unsigned char getTargetFlags() const { return TargetFlags; }
	int getIndex() const { return Index; }
	int64_t getOffset() const { return Offset; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::TargetIndex;
	}
	};

	class BasicBlockSDNode : public SDNode {
	friend class SelectionDAG;

	MachineBasicBlock *MBB;

	/// Debug info is meaningful and potentially useful here, but we create
	/// blocks out of order when they're jumped to, which makes it a bit
	/// harder. Let's see if we need it first.
	explicit BasicBlockSDNode(MachineBasicBlock *mbb)
	: SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
	{}

	public:
	MachineBasicBlock *getBasicBlock() const { return MBB; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::BasicBlock;
	}
	};

	/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
	class BuildVectorSDNode : public SDNode {
	public:
	// These are constructed as SDNodes and then cast to BuildVectorSDNodes.
	explicit BuildVectorSDNode() = delete;

	/// Check if this is a constant splat, and if so, find the
	/// smallest element size that splats the vector. If MinSplatBits is
	/// nonzero, the element size must be at least that large. Note that the
	/// splat element may be the entire vector (i.e., a one element vector).
	/// Returns the splat element value in SplatValue. Any undefined bits in
	/// that value are zero, and the corresponding bits in the SplatUndef mask
	/// are set. The SplatBitSize value is set to the splat element size in
	/// bits. HasAnyUndefs is set to true if any bits in the vector are
	/// undefined. isBigEndian describes the endianness of the target.
	bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
	unsigned &SplatBitSize, bool &HasAnyUndefs,
	unsigned MinSplatBits = 0,
	bool isBigEndian = false) const;

	/// \brief Returns the splatted value or a null value if this is not a splat.
	///
	/// If passed a non-null UndefElements bitvector, it will resize it to match
	/// the vector width and set the bits where elements are undef.
	SDValue getSplatValue(BitVector *UndefElements = nullptr) const;

	/// \brief Returns the splatted constant or null if this is not a constant
	/// splat.
	///
	/// If passed a non-null UndefElements bitvector, it will resize it to match
	/// the vector width and set the bits where elements are undef.
	ConstantSDNode *
	getConstantSplatNode(BitVector *UndefElements = nullptr) const;

	/// \brief Returns the splatted constant FP or null if this is not a constant
	/// FP splat.
	///
	/// If passed a non-null UndefElements bitvector, it will resize it to match
	/// the vector width and set the bits where elements are undef.
	ConstantFPSDNode *
	getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;

	/// \brief If this is a constant FP splat and the splatted constant FP is an
	/// exact power or 2, return the log base 2 integer value. Otherwise,
	/// return -1.
	///
	/// The BitWidth specifies the necessary bit precision.
	int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
	uint32_t BitWidth) const;

	bool isConstant() const;

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::BUILD_VECTOR;
	}
	};

	/// An SDNode that holds an arbitrary LLVM IR Value. This is
	/// used when the SelectionDAG needs to make a simple reference to something
	/// in the LLVM IR representation.
	///
	class SrcValueSDNode : public SDNode {
	friend class SelectionDAG;

	const Value *V;

	/// Create a SrcValue for a general value.
	explicit SrcValueSDNode(const Value *v)
	: SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}

	public:
	/// Return the contained Value.
	const Value *getValue() const { return V; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::SRCVALUE;
	}
	};

	class MDNodeSDNode : public SDNode {
	friend class SelectionDAG;

	const MDNode *MD;

	explicit MDNodeSDNode(const MDNode *md)
	: SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
	{}

	public:
	const MDNode *getMD() const { return MD; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::MDNODE_SDNODE;
	}
	};

	class RegisterSDNode : public SDNode {
	friend class SelectionDAG;

	unsigned Reg;

	RegisterSDNode(unsigned reg, EVT VT)
	: SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}

	public:
	unsigned getReg() const { return Reg; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::Register;
	}
	};

	class RegisterMaskSDNode : public SDNode {
	friend class SelectionDAG;

	// The memory for RegMask is not owned by the node.
	const uint32_t *RegMask;

	RegisterMaskSDNode(const uint32_t *mask)
	: SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
	RegMask(mask) {}

	public:
	const uint32_t *getRegMask() const { return RegMask; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::RegisterMask;
	}
	};

	class BlockAddressSDNode : public SDNode {
	friend class SelectionDAG;

	const BlockAddress *BA;
	int64_t Offset;
	unsigned char TargetFlags;

	BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
	int64_t o, unsigned char Flags)
	: SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
	BA(ba), Offset(o), TargetFlags(Flags) {}

	public:
	const BlockAddress *getBlockAddress() const { return BA; }
	int64_t getOffset() const { return Offset; }
	unsigned char getTargetFlags() const { return TargetFlags; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::BlockAddress \|\|
	N->getOpcode() == ISD::TargetBlockAddress;
	}
	};

	class EHLabelSDNode : public SDNode {
	friend class SelectionDAG;

	MCSymbol *Label;

	EHLabelSDNode(unsigned Order, const DebugLoc &dl, MCSymbol *L)
	: SDNode(ISD::EH_LABEL, Order, dl, getSDVTList(MVT::Other)), Label(L) {}

	public:
	MCSymbol *getLabel() const { return Label; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::EH_LABEL;
	}
	};

	class ExternalSymbolSDNode : public SDNode {
	friend class SelectionDAG;

	const char *Symbol;
	unsigned char TargetFlags;

	ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, EVT VT)
	: SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol,
	0, DebugLoc(), getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {}

	public:
	const char *getSymbol() const { return Symbol; }
	unsigned char getTargetFlags() const { return TargetFlags; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::ExternalSymbol \|\|
	N->getOpcode() == ISD::TargetExternalSymbol;
	}
	};

	class MCSymbolSDNode : public SDNode {
	friend class SelectionDAG;

	MCSymbol *Symbol;

	MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
	: SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}

	public:
	MCSymbol *getMCSymbol() const { return Symbol; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::MCSymbol;
	}
	};

	class CondCodeSDNode : public SDNode {
	friend class SelectionDAG;

	ISD::CondCode Condition;

	explicit CondCodeSDNode(ISD::CondCode Cond)
	: SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
	Condition(Cond) {}

	public:
	ISD::CondCode get() const { return Condition; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::CONDCODE;
	}
	};

	/// This class is used to represent EVT's, which are used
	/// to parameterize some operations.
	class VTSDNode : public SDNode {
	friend class SelectionDAG;

	EVT ValueType;

	explicit VTSDNode(EVT VT)
	: SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
	ValueType(VT) {}

	public:
	EVT getVT() const { return ValueType; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::VALUETYPE;
	}
	};

	/// Base class for LoadSDNode and StoreSDNode
	class LSBaseSDNode : public MemSDNode {
	public:
	LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
	SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
	MachineMemOperand *MMO)
	: MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
	LSBaseSDNodeBits.AddressingMode = AM;
	assert(getAddressingMode() == AM && "Value truncated");
	}

	const SDValue &getOffset() const {
	return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
	}

	/// Return the addressing mode for this load or store:
	/// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
	ISD::MemIndexedMode getAddressingMode() const {
	return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
	}

	/// Return true if this is a pre/post inc/dec load/store.
	bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }

	/// Return true if this is NOT a pre/post inc/dec load/store.
	bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::LOAD \|\|
	N->getOpcode() == ISD::STORE;
	}
	};

	/// This class is used to represent ISD::LOAD nodes.
	class LoadSDNode : public LSBaseSDNode {
	friend class SelectionDAG;

	LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
	ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
	MachineMemOperand *MMO)
	: LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
	LoadSDNodeBits.ExtTy = ETy;
	assert(readMem() && "Load MachineMemOperand is not a load!");
	assert(!writeMem() && "Load MachineMemOperand is a store!");
	}

	public:
	/// Return whether this is a plain node,
	/// or one of the varieties of value-extending loads.
	ISD::LoadExtType getExtensionType() const {
	return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
	}

	const SDValue &getBasePtr() const { return getOperand(1); }
	const SDValue &getOffset() const { return getOperand(2); }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::LOAD;
	}
	};

	/// This class is used to represent ISD::STORE nodes.
	class StoreSDNode : public LSBaseSDNode {
	friend class SelectionDAG;

	StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
	ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
	MachineMemOperand *MMO)
	: LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
	StoreSDNodeBits.IsTruncating = isTrunc;
	assert(!readMem() && "Store MachineMemOperand is a load!");
	assert(writeMem() && "Store MachineMemOperand is not a store!");
	}

	public:
	/// Return true if the op does a truncation before store.
	/// For integers this is the same as doing a TRUNCATE and storing the result.
	/// For floats, it is the same as doing an FP_ROUND and storing the result.
	bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }

	const SDValue &getValue() const { return getOperand(1); }
	const SDValue &getBasePtr() const { return getOperand(2); }
	const SDValue &getOffset() const { return getOperand(3); }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::STORE;
	}
	};

	/// This base class is used to represent MLOAD and MSTORE nodes
	class MaskedLoadStoreSDNode : public MemSDNode {
	public:
	friend class SelectionDAG;

	MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
	const DebugLoc &dl, SDVTList VTs, EVT MemVT,
	MachineMemOperand *MMO)
	: MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}

	// In the both nodes address is Op1, mask is Op2:
	// MaskedLoadSDNode (Chain, ptr, mask, src0), src0 is a passthru value
	// MaskedStoreSDNode (Chain, ptr, mask, data)
	// Mask is a vector of i1 elements
	const SDValue &getBasePtr() const { return getOperand(1); }
	const SDValue &getMask() const { return getOperand(2); }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::MLOAD \|\|
	N->getOpcode() == ISD::MSTORE;
	}
	};

	/// This class is used to represent an MLOAD node
	class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
	public:
	friend class SelectionDAG;

	MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
	ISD::LoadExtType ETy, bool IsExpanding, EVT MemVT,
	MachineMemOperand *MMO)
	: MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, MemVT, MMO) {
	LoadSDNodeBits.ExtTy = ETy;
	LoadSDNodeBits.IsExpanding = IsExpanding;
	}

	ISD::LoadExtType getExtensionType() const {
	return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
	}

	const SDValue &getSrc0() const { return getOperand(3); }
	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::MLOAD;
	}

	bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
	};

	/// This class is used to represent an MSTORE node
	class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
	public:
	friend class SelectionDAG;

	MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
	bool isTrunc, bool isCompressing, EVT MemVT,
	MachineMemOperand *MMO)
	: MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, MemVT, MMO) {
	StoreSDNodeBits.IsTruncating = isTrunc;
	StoreSDNodeBits.IsCompressing = isCompressing;
	}

	/// Return true if the op does a truncation before store.
	/// For integers this is the same as doing a TRUNCATE and storing the result.
	/// For floats, it is the same as doing an FP_ROUND and storing the result.
	bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }

	/// Returns true if the op does a compression to the vector before storing.
	/// The node contiguously stores the active elements (integers or floats)
	/// in src (those with their respective bit set in writemask k) to unaligned
	/// memory at base_addr.
	bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }

	const SDValue &getValue() const { return getOperand(3); }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::MSTORE;
	}
	};

	/// This is a base class used to represent
	/// MGATHER and MSCATTER nodes
	///
	class MaskedGatherScatterSDNode : public MemSDNode {
	public:
	friend class SelectionDAG;

	MaskedGatherScatterSDNode(unsigned NodeTy, unsigned Order,
	const DebugLoc &dl, SDVTList VTs, EVT MemVT,
	MachineMemOperand *MMO)
	: MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}

	// In the both nodes address is Op1, mask is Op2:
	// MaskedGatherSDNode (Chain, src0, mask, base, index), src0 is a passthru value
	// MaskedScatterSDNode (Chain, value, mask, base, index)
	// Mask is a vector of i1 elements
	const SDValue &getBasePtr() const { return getOperand(3); }
	const SDValue &getIndex() const { return getOperand(4); }
	const SDValue &getMask() const { return getOperand(2); }
	const SDValue &getValue() const { return getOperand(1); }

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::MGATHER \|\|
	N->getOpcode() == ISD::MSCATTER;
	}
	};

	/// This class is used to represent an MGATHER node
	///
	class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
	public:
	friend class SelectionDAG;

	MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
	EVT MemVT, MachineMemOperand *MMO)
	: MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO) {}

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::MGATHER;
	}
	};

	/// This class is used to represent an MSCATTER node
	///
	class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
	public:
	friend class SelectionDAG;

	MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
	EVT MemVT, MachineMemOperand *MMO)
	: MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO) {}

	static bool classof(const SDNode *N) {
	return N->getOpcode() == ISD::MSCATTER;
	}
	};

	/// An SDNode that represents everything that will be needed
	/// to construct a MachineInstr. These nodes are created during the
	/// instruction selection proper phase.
	class MachineSDNode : public SDNode {
	public:
	using mmo_iterator = MachineMemOperand **;

	private:
	friend class SelectionDAG;

	MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
	: SDNode(Opc, Order, DL, VTs) {}

	/// Memory reference descriptions for this instruction.
	mmo_iterator MemRefs = nullptr;
	mmo_iterator MemRefsEnd = nullptr;

	public:
	mmo_iterator memoperands_begin() const { return MemRefs; }
	mmo_iterator memoperands_end() const { return MemRefsEnd; }
	bool memoperands_empty() const { return MemRefsEnd == MemRefs; }

	/// Assign this MachineSDNodes's memory reference descriptor
	/// list. This does not transfer ownership.
	void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
	for (mmo_iterator MMI = NewMemRefs, MME = NewMemRefsEnd; MMI != MME; ++MMI)
	assert(*MMI && "Null mem ref detected!");
	MemRefs = NewMemRefs;
	MemRefsEnd = NewMemRefsEnd;
	}

	static bool classof(const SDNode *N) {
	return N->isMachineOpcode();
	}
	};

	class SDNodeIterator : public std::iterator<std::forward_iterator_tag,
	SDNode, ptrdiff_t> {
	const SDNode *Node;
	unsigned Operand;

	SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}

	public:
	bool operator==(const SDNodeIterator& x) const {
	return Operand == x.Operand;
	}
	bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }

	pointer operator*() const {
	return Node->getOperand(Operand).getNode();
	}
	pointer operator->() const { return operator*(); }

	SDNodeIterator& operator++() { // Preincrement
	++Operand;
	return *this;
	}
	SDNodeIterator operator++(int) { // Postincrement
	SDNodeIterator tmp = this; ++this; return tmp;
	}
	size_t operator-(SDNodeIterator Other) const {
	assert(Node == Other.Node &&
	"Cannot compare iterators of two different nodes!");
	return Operand - Other.Operand;
	}

	static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
	static SDNodeIterator end (const SDNode *N) {
	return SDNodeIterator(N, N->getNumOperands());
	}

	unsigned getOperand() const { return Operand; }
	const SDNode *getNode() const { return Node; }
	};

	template <> struct GraphTraits<SDNode*> {
	using NodeRef = SDNode *;
	using ChildIteratorType = SDNodeIterator;

	static NodeRef getEntryNode(SDNode *N) { return N; }

	static ChildIteratorType child_begin(NodeRef N) {
	return SDNodeIterator::begin(N);
	}

	static ChildIteratorType child_end(NodeRef N) {
	return SDNodeIterator::end(N);
	}
	};

	/// A representation of the largest SDNode, for use in sizeof().
	///
	/// This needs to be a union because the largest node differs on 32 bit systems
	/// with 4 and 8 byte pointer alignment, respectively.
	using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
	BlockAddressSDNode,
	GlobalAddressSDNode>;

	/// The SDNode class with the greatest alignment requirement.
	using MostAlignedSDNode = GlobalAddressSDNode;

	namespace ISD {

	/// Returns true if the specified node is a non-extending and unindexed load.
	inline bool isNormalLoad(const SDNode *N) {
	const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
	return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
	Ld->getAddressingMode() == ISD::UNINDEXED;
	}

	/// Returns true if the specified node is a non-extending load.
	inline bool isNON_EXTLoad(const SDNode *N) {
	return isa<LoadSDNode>(N) &&
	cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
	}

	/// Returns true if the specified node is a EXTLOAD.
	inline bool isEXTLoad(const SDNode *N) {
	return isa<LoadSDNode>(N) &&
	cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
	}

	/// Returns true if the specified node is a SEXTLOAD.
	inline bool isSEXTLoad(const SDNode *N) {
	return isa<LoadSDNode>(N) &&
	cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
	}

	/// Returns true if the specified node is a ZEXTLOAD.
	inline bool isZEXTLoad(const SDNode *N) {
	return isa<LoadSDNode>(N) &&
	cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
	}

	/// Returns true if the specified node is an unindexed load.
	inline bool isUNINDEXEDLoad(const SDNode *N) {
	return isa<LoadSDNode>(N) &&
	cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
	}

	/// Returns true if the specified node is a non-truncating
	/// and unindexed store.
	inline bool isNormalStore(const SDNode *N) {
	const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
	return St && !St->isTruncatingStore() &&
	St->getAddressingMode() == ISD::UNINDEXED;
	}

	/// Returns true if the specified node is a non-truncating store.
	inline bool isNON_TRUNCStore(const SDNode *N) {
	return isa<StoreSDNode>(N) && !cast<StoreSDNode>(N)->isTruncatingStore();
	}

	/// Returns true if the specified node is a truncating store.
	inline bool isTRUNCStore(const SDNode *N) {
	return isa<StoreSDNode>(N) && cast<StoreSDNode>(N)->isTruncatingStore();
	}

	/// Returns true if the specified node is an unindexed store.
	inline bool isUNINDEXEDStore(const SDNode *N) {
	return isa<StoreSDNode>(N) &&
	cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
	}

	} // end namespace ISD

	} // end namespace llvm

	#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H
	Index: head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
	===================================================================
	--- head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (revision 322854)
	+++ head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (revision 322855)
	@@ -1,872 +1,873 @@
	//===-- LegalizeTypes.h - DAG Type Legalizer class definition ---- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the DAGTypeLegalizer class. This is a private interface
	// shared between the code that implements the SelectionDAG::LegalizeTypes
	// method.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
	#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H

	#include "llvm/ADT/DenseMap.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Target/TargetLowering.h"

	namespace llvm {

	//===----------------------------------------------------------------------===//
	/// This takes an arbitrary SelectionDAG as input and hacks on it until only
	/// value types the target machine can handle are left. This involves promoting
	/// small sizes to large sizes or splitting up large values into small values.
	///
	class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
	const TargetLowering &TLI;
	SelectionDAG &DAG;
	public:
	/// This pass uses the NodeId on the SDNodes to hold information about the
	/// state of the node. The enum has all the values.
	enum NodeIdFlags {
	/// All operands have been processed, so this node is ready to be handled.
	ReadyToProcess = 0,

	/// This is a new node, not before seen, that was created in the process of
	/// legalizing some other node.
	NewNode = -1,

	/// This node's ID needs to be set to the number of its unprocessed
	/// operands.
	Unanalyzed = -2,

	/// This is a node that has already been processed.
	Processed = -3

	// 1+ - This is a node which has this many unprocessed operands.
	};
	private:

	/// This is a bitvector that contains two bits for each simple value type,
	/// where the two bits correspond to the LegalizeAction enum from
	/// TargetLowering. This can be queried with "getTypeAction(VT)".
	TargetLowering::ValueTypeActionImpl ValueTypeActions;

	/// Return how we should legalize values of this type.
	TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
	return TLI.getTypeAction(*DAG.getContext(), VT);
	}

	/// Return true if this type is legal on this target.
	bool isTypeLegal(EVT VT) const {
	return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
	}

	/// Return true if this is a simple legal type.
	bool isSimpleLegalType(EVT VT) const {
	return VT.isSimple() && TLI.isTypeLegal(VT);
	}

	/// Return true if this type can be passed in registers.
	/// For example, x86_64's f128, should to be legally in registers
	/// and only some operations converted to library calls or integer
	/// bitwise operations.
	bool isLegalInHWReg(EVT VT) const {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	return VT == NVT && isSimpleLegalType(VT);
	}

	EVT getSetCCResultType(EVT VT) const {
	return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	}

	/// Pretend all of this node's results are legal.
	bool IgnoreNodeResults(SDNode *N) const {
	return N->getOpcode() == ISD::TargetConstant;
	}

	/// For integer nodes that are below legal width, this map indicates what
	/// promoted value to use.
	SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers;

	/// For integer nodes that need to be expanded this map indicates which
	/// operands are the expanded version of the input.
	SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers;

	/// For floating-point nodes converted to integers of the same size, this map
	/// indicates the converted value to use.
	SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats;

	/// For floating-point nodes that have a smaller precision than the smallest
	/// supported precision, this map indicates what promoted value to use.
	SmallDenseMap<SDValue, SDValue, 8> PromotedFloats;

	/// For float nodes that need to be expanded this map indicates which operands
	/// are the expanded version of the input.
	SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats;

	/// For nodes that are <1 x ty>, this map indicates the scalar value of type
	/// 'ty' to use.
	SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors;

	/// For nodes that need to be split this map indicates which operands are the
	/// expanded version of the input.
	SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors;

	/// For vector nodes that need to be widened, indicates the widened value to
	/// use.
	SmallDenseMap<SDValue, SDValue, 8> WidenedVectors;

	/// For values that have been replaced with another, indicates the replacement
	/// value to use.
	SmallDenseMap<SDValue, SDValue, 8> ReplacedValues;

	/// This defines a worklist of nodes to process. In order to be pushed onto
	/// this worklist, all operands of a node must have already been processed.
	SmallVector<SDNode*, 128> Worklist;

	public:
	explicit DAGTypeLegalizer(SelectionDAG &dag)
	: TLI(dag.getTargetLoweringInfo()), DAG(dag),
	ValueTypeActions(TLI.getValueTypeActions()) {
	static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
	"Too many value types for ValueTypeActions to hold!");
	}

	/// This is the main entry point for the type legalizer. This does a
	/// top-down traversal of the dag, legalizing types as it goes. Returns
	/// "true" if it made any changes.
	bool run();

	void NoteDeletion(SDNode Old, SDNode New) {
	ExpungeNode(Old);
	ExpungeNode(New);
	for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
	ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
	}

	SelectionDAG &getDAG() const { return DAG; }

	private:
	SDNode AnalyzeNewNode(SDNode N);
	void AnalyzeNewValue(SDValue &Val);
	void ExpungeNode(SDNode *N);
	void PerformExpensiveChecks();
	void RemapValue(SDValue &N);

	// Common routines.
	SDValue BitConvertToInteger(SDValue Op);
	SDValue BitConvertVectorToIntegerVector(SDValue Op);
	SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
	bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
	bool CustomWidenLowerNode(SDNode *N, EVT VT);

	/// Replace each result of the given MERGE_VALUES node with the corresponding
	/// input operand, except for the result 'ResNo', for which the corresponding
	/// input operand is returned.
	SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);

	SDValue JoinIntegers(SDValue Lo, SDValue Hi);
	SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);

	std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
	SDNode *Node, bool isSigned);
	std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);

	SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);

	/// Modify Bit Vector to match SetCC result type of ValVT.
	/// The bit vector is widened with zeroes when WithZeroes is true.
	SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false);

	void ReplaceValueWith(SDValue From, SDValue To);
	void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
	void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
	SDValue &Lo, SDValue &Hi);

	void AddToWorklist(SDNode *N) {
	N->setNodeId(ReadyToProcess);
	Worklist.push_back(N);
	}

	//===--------------------------------------------------------------------===//
	// Integer Promotion Support: LegalizeIntegerTypes.cpp
	//===--------------------------------------------------------------------===//

	/// Given a processed operand Op which was promoted to a larger integer type,
	/// this returns the promoted value. The low bits of the promoted value
	/// corresponding to the original type are exactly equal to Op.
	/// The extra bits contain rubbish, so the promoted value may need to be zero-
	/// or sign-extended from the original type before it is usable (the helpers
	/// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
	/// For example, if Op is an i16 and was promoted to an i32, then this method
	/// returns an i32, the lower 16 bits of which coincide with Op, and the upper
	/// 16 bits of which contain rubbish.
	SDValue GetPromotedInteger(SDValue Op) {
	SDValue &PromotedOp = PromotedIntegers[Op];
	RemapValue(PromotedOp);
	assert(PromotedOp.getNode() && "Operand wasn't promoted?");
	return PromotedOp;
	}
	void SetPromotedInteger(SDValue Op, SDValue Result);

	/// Get a promoted operand and sign extend it to the final size.
	SDValue SExtPromotedInteger(SDValue Op) {
	EVT OldVT = Op.getValueType();
	SDLoc dl(Op);
	Op = GetPromotedInteger(Op);
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
	DAG.getValueType(OldVT));
	}

	/// Get a promoted operand and zero extend it to the final size.
	SDValue ZExtPromotedInteger(SDValue Op) {
	EVT OldVT = Op.getValueType();
	SDLoc dl(Op);
	Op = GetPromotedInteger(Op);
	return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
	}

	// Integer Result Promotion.
	void PromoteIntegerResult(SDNode *N, unsigned ResNo);
	SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
	SDValue PromoteIntRes_AssertSext(SDNode *N);
	SDValue PromoteIntRes_AssertZext(SDNode *N);
	SDValue PromoteIntRes_Atomic0(AtomicSDNode *N);
	SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
	SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
	SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
	SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
	SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
	SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
	SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
	SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
	SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
	SDValue PromoteIntRes_BITCAST(SDNode *N);
	SDValue PromoteIntRes_BSWAP(SDNode *N);
	SDValue PromoteIntRes_BITREVERSE(SDNode *N);
	SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
	SDValue PromoteIntRes_Constant(SDNode *N);
	SDValue PromoteIntRes_CTLZ(SDNode *N);
	SDValue PromoteIntRes_CTPOP(SDNode *N);
	SDValue PromoteIntRes_CTTZ(SDNode *N);
	SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
	SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
	SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
	SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
	SDValue PromoteIntRes_LOAD(LoadSDNode *N);
	SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
	SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
	SDValue PromoteIntRes_Overflow(SDNode *N);
	SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
	SDValue PromoteIntRes_SELECT(SDNode *N);
	SDValue PromoteIntRes_VSELECT(SDNode *N);
	SDValue PromoteIntRes_SELECT_CC(SDNode *N);
	SDValue PromoteIntRes_SETCC(SDNode *N);
	SDValue PromoteIntRes_SHL(SDNode *N);
	SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
	SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
	SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
	SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
	SDValue PromoteIntRes_SRA(SDNode *N);
	SDValue PromoteIntRes_SRL(SDNode *N);
	SDValue PromoteIntRes_TRUNCATE(SDNode *N);
	SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
	SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
	SDValue PromoteIntRes_UNDEF(SDNode *N);
	SDValue PromoteIntRes_VAARG(SDNode *N);
	SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);

	// Integer Operand Promotion.
	bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
	SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
	SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N);
	SDValue PromoteIntOp_BITCAST(SDNode *N);
	SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
	SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
	SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
	SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
	SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
	SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
	SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
	SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
	SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
	SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
	SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
	SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
	SDValue PromoteIntOp_Shift(SDNode *N);
	SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
	SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
	SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
	SDValue PromoteIntOp_TRUNCATE(SDNode *N);
	SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
	SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
	SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
	SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
	SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
	SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
	SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);

	void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);

	//===--------------------------------------------------------------------===//
	// Integer Expansion Support: LegalizeIntegerTypes.cpp
	//===--------------------------------------------------------------------===//

	/// Given a processed operand Op which was expanded into two integers of half
	/// the size, this returns the two halves. The low bits of Op are exactly
	/// equal to the bits of Lo; the high bits exactly equal Hi.
	/// For example, if Op is an i64 which was expanded into two i32's, then this
	/// method returns the two i32's, with Lo being equal to the lower 32 bits of
	/// Op, and Hi being equal to the upper 32 bits.
	void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
	void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);

	// Integer Result Expansion.
	void ExpandIntegerResult(SDNode *N, unsigned ResNo);
	void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);

	void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);

	void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi);

	void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);

	void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);

	void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
	SDValue &Lo, SDValue &Hi);
	bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
	bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);

	// Integer Operand Expansion.
	bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
	SDValue ExpandIntOp_BR_CC(SDNode *N);
	SDValue ExpandIntOp_SELECT_CC(SDNode *N);
	SDValue ExpandIntOp_SETCC(SDNode *N);
	SDValue ExpandIntOp_SETCCE(SDNode *N);
	SDValue ExpandIntOp_SETCCCARRY(SDNode *N);
	SDValue ExpandIntOp_Shift(SDNode *N);
	SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
	SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
	SDValue ExpandIntOp_TRUNCATE(SDNode *N);
	SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
	SDValue ExpandIntOp_RETURNADDR(SDNode *N);
	SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);

	void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
	ISD::CondCode &CCCode, const SDLoc &dl);

	//===--------------------------------------------------------------------===//
	// Float to Integer Conversion Support: LegalizeFloatTypes.cpp
	//===--------------------------------------------------------------------===//

	/// Given an operand Op of Float type, returns the integer if the Op is not
	/// supported in target HW and converted to the integer.
	/// The integer contains exactly the same bits as Op - only the type changed.
	/// For example, if Op is an f32 which was softened to an i32, then this method
	/// returns an i32, the bits of which coincide with those of Op.
	/// If the Op can be efficiently supported in target HW or the operand must
	/// stay in a register, the Op is not converted to an integer.
	/// In that case, the given op is returned.
	SDValue GetSoftenedFloat(SDValue Op) {
	SDValue &SoftenedOp = SoftenedFloats[Op];
	if (!SoftenedOp.getNode() &&
	isSimpleLegalType(Op.getValueType()))
	return Op;
	RemapValue(SoftenedOp);
	assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
	return SoftenedOp;
	}
	void SetSoftenedFloat(SDValue Op, SDValue Result);

	// Convert Float Results to Integer for Non-HW-supported Operations.
	bool SoftenFloatResult(SDNode *N, unsigned ResNo);
	SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
	SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
	SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
	SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
	SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo);
	SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
	SDValue SoftenFloatRes_FMINNUM(SDNode *N);
	SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
	SDValue SoftenFloatRes_FADD(SDNode *N);
	SDValue SoftenFloatRes_FCEIL(SDNode *N);
	SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo);
	SDValue SoftenFloatRes_FCOS(SDNode *N);
	SDValue SoftenFloatRes_FDIV(SDNode *N);
	SDValue SoftenFloatRes_FEXP(SDNode *N);
	SDValue SoftenFloatRes_FEXP2(SDNode *N);
	SDValue SoftenFloatRes_FFLOOR(SDNode *N);
	SDValue SoftenFloatRes_FLOG(SDNode *N);
	SDValue SoftenFloatRes_FLOG2(SDNode *N);
	SDValue SoftenFloatRes_FLOG10(SDNode *N);
	SDValue SoftenFloatRes_FMA(SDNode *N);
	SDValue SoftenFloatRes_FMUL(SDNode *N);
	SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
	SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo);
	SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
	SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
	SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
	SDValue SoftenFloatRes_FPOW(SDNode *N);
	SDValue SoftenFloatRes_FPOWI(SDNode *N);
	SDValue SoftenFloatRes_FREM(SDNode *N);
	SDValue SoftenFloatRes_FRINT(SDNode *N);
	SDValue SoftenFloatRes_FROUND(SDNode *N);
	SDValue SoftenFloatRes_FSIN(SDNode *N);
	SDValue SoftenFloatRes_FSQRT(SDNode *N);
	SDValue SoftenFloatRes_FSUB(SDNode *N);
	SDValue SoftenFloatRes_FTRUNC(SDNode *N);
	SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo);
	SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo);
	SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo);
	SDValue SoftenFloatRes_UNDEF(SDNode *N);
	SDValue SoftenFloatRes_VAARG(SDNode *N);
	SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);

	// Return true if we can skip softening the given operand or SDNode because
	// either it was soften before by SoftenFloatResult and references to the
	// operand were replaced by ReplaceValueWith or it's value type is legal in HW
	// registers and the operand can be left unchanged.
	bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);

	// Convert Float Operand to Integer for Non-HW-supported Operations.
	bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
	SDValue SoftenFloatOp_BITCAST(SDNode *N);
	SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N);
	SDValue SoftenFloatOp_BR_CC(SDNode *N);
	SDValue SoftenFloatOp_FABS(SDNode *N);
	SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
	SDValue SoftenFloatOp_FNEG(SDNode *N);
	SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
	SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
	SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
	SDValue SoftenFloatOp_SELECT(SDNode *N);
	SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
	SDValue SoftenFloatOp_SETCC(SDNode *N);
	SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);

	//===--------------------------------------------------------------------===//
	// Float Expansion Support: LegalizeFloatTypes.cpp
	//===--------------------------------------------------------------------===//

	/// Given a processed operand Op which was expanded into two floating-point
	/// values of half the size, this returns the two halves.
	/// The low bits of Op are exactly equal to the bits of Lo; the high bits
	/// exactly equal Hi. For example, if Op is a ppcf128 which was expanded
	/// into two f64's, then this method returns the two f64's, with Lo being
	/// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
	void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
	void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);

	// Float Result Expansion.
	void ExpandFloatResult(SDNode *N, unsigned ResNo);
	void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);

	// Float Operand Expansion.
	bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
	SDValue ExpandFloatOp_BR_CC(SDNode *N);
	SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N);
	SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
	SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
	SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
	SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
	SDValue ExpandFloatOp_SETCC(SDNode *N);
	SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);

	void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
	ISD::CondCode &CCCode, const SDLoc &dl);

	//===--------------------------------------------------------------------===//
	// Float promotion support: LegalizeFloatTypes.cpp
	//===--------------------------------------------------------------------===//

	SDValue GetPromotedFloat(SDValue Op) {
	SDValue &PromotedOp = PromotedFloats[Op];
	RemapValue(PromotedOp);
	assert(PromotedOp.getNode() && "Operand wasn't promoted?");
	return PromotedOp;
	}
	void SetPromotedFloat(SDValue Op, SDValue Result);

	void PromoteFloatResult(SDNode *N, unsigned ResNo);
	SDValue PromoteFloatRes_BITCAST(SDNode *N);
	SDValue PromoteFloatRes_BinOp(SDNode *N);
	SDValue PromoteFloatRes_ConstantFP(SDNode *N);
	SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
	SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N);
	SDValue PromoteFloatRes_FMAD(SDNode *N);
	SDValue PromoteFloatRes_FPOWI(SDNode *N);
	SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
	SDValue PromoteFloatRes_LOAD(SDNode *N);
	SDValue PromoteFloatRes_SELECT(SDNode *N);
	SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
	SDValue PromoteFloatRes_UnaryOp(SDNode *N);
	SDValue PromoteFloatRes_UNDEF(SDNode *N);
	SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);

	bool PromoteFloatOperand(SDNode *N, unsigned ResNo);
	SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
	SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
	SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
	SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);
	SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
	SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
	SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);

	//===--------------------------------------------------------------------===//
	// Scalarization Support: LegalizeVectorTypes.cpp
	//===--------------------------------------------------------------------===//

	/// Given a processed one-element vector Op which was scalarized to its
	/// element type, this returns the element. For example, if Op is a v1i32,
	/// Op = < i32 val >, this method returns val, an i32.
	SDValue GetScalarizedVector(SDValue Op) {
	SDValue &ScalarizedOp = ScalarizedVectors[Op];
	RemapValue(ScalarizedOp);
	assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
	return ScalarizedOp;
	}
	void SetScalarizedVector(SDValue Op, SDValue Result);

	// Vector Result Scalarization: <1 x ty> -> ty.
	void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
	SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
	SDValue ScalarizeVecRes_BinOp(SDNode *N);
	SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
	SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
	SDValue ScalarizeVecRes_InregOp(SDNode *N);
	SDValue ScalarizeVecRes_VecInregOp(SDNode *N);

	SDValue ScalarizeVecRes_BITCAST(SDNode *N);
	SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
	SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
	SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
	SDValue ScalarizeVecRes_FPOWI(SDNode *N);
	SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
	SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
	SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
	SDValue ScalarizeVecRes_VSELECT(SDNode *N);
	SDValue ScalarizeVecRes_SELECT(SDNode *N);
	SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
	SDValue ScalarizeVecRes_SETCC(SDNode *N);
	SDValue ScalarizeVecRes_UNDEF(SDNode *N);
	SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
	SDValue ScalarizeVecRes_VSETCC(SDNode *N);

	// Vector Operand Scalarization: <1 x ty> -> ty.
	bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
	SDValue ScalarizeVecOp_BITCAST(SDNode *N);
	SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
	SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
	SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
	SDValue ScalarizeVecOp_VSELECT(SDNode *N);
	+ SDValue ScalarizeVecOp_VSETCC(SDNode *N);
	SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
	SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);

	//===--------------------------------------------------------------------===//
	// Vector Splitting Support: LegalizeVectorTypes.cpp
	//===--------------------------------------------------------------------===//

	/// Given a processed vector Op which was split into vectors of half the size,
	/// this method returns the halves. The first elements of Op coincide with the
	/// elements of Lo; the remaining elements of Op coincide with the elements of
	/// Hi: Op is what you would get by concatenating Lo and Hi.
	/// For example, if Op is a v8i32 that was split into two v4i32's, then this
	/// method returns the two v4i32's, with Lo corresponding to the first 4
	/// elements of Op, and Hi to the last 4 elements.
	void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
	void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);

	// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
	void SplitVectorResult(SDNode *N, unsigned OpNo);
	void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);

	void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
	SDValue &Hi);

	// Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
	bool SplitVectorOperand(SDNode *N, unsigned OpNo);
	SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
	SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo);
	SDValue SplitVecOp_UnaryOp(SDNode *N);
	SDValue SplitVecOp_TruncateHelper(SDNode *N);

	SDValue SplitVecOp_BITCAST(SDNode *N);
	SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
	SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
	SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
	SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
	SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
	SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
	SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
	SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
	SDValue SplitVecOp_VSETCC(SDNode *N);
	SDValue SplitVecOp_FP_ROUND(SDNode *N);
	SDValue SplitVecOp_FCOPYSIGN(SDNode *N);

	//===--------------------------------------------------------------------===//
	// Vector Widening Support: LegalizeVectorTypes.cpp
	//===--------------------------------------------------------------------===//

	/// Given a processed vector Op which was widened into a larger vector, this
	/// method returns the larger vector. The elements of the returned vector
	/// consist of the elements of Op followed by elements containing rubbish.
	/// For example, if Op is a v2i32 that was widened to a v4i32, then this
	/// method returns a v4i32 for which the first two elements are the same as
	/// those of Op, while the last two elements contain rubbish.
	SDValue GetWidenedVector(SDValue Op) {
	SDValue &WidenedOp = WidenedVectors[Op];
	RemapValue(WidenedOp);
	assert(WidenedOp.getNode() && "Operand wasn't widened?");
	return WidenedOp;
	}
	void SetWidenedVector(SDValue Op, SDValue Result);

	// Widen Vector Result Promotion.
	void WidenVectorResult(SDNode *N, unsigned ResNo);
	SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
	SDValue WidenVecRes_BITCAST(SDNode* N);
	SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
	SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
	SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
	SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
	SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
	SDValue WidenVecRes_LOAD(SDNode* N);
	SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
	SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
	SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
	SDValue WidenVecRes_SELECT(SDNode* N);
	SDValue WidenVSELECTAndMask(SDNode *N);
	SDValue WidenVecRes_SELECT_CC(SDNode* N);
	SDValue WidenVecRes_SETCC(SDNode* N);
	SDValue WidenVecRes_UNDEF(SDNode *N);
	SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
	SDValue WidenVecRes_VSETCC(SDNode* N);

	SDValue WidenVecRes_Ternary(SDNode *N);
	SDValue WidenVecRes_Binary(SDNode *N);
	SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
	SDValue WidenVecRes_Convert(SDNode *N);
	SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
	SDValue WidenVecRes_POWI(SDNode *N);
	SDValue WidenVecRes_Shift(SDNode *N);
	SDValue WidenVecRes_Unary(SDNode *N);
	SDValue WidenVecRes_InregOp(SDNode *N);

	// Widen Vector Operand.
	bool WidenVectorOperand(SDNode *N, unsigned OpNo);
	SDValue WidenVecOp_BITCAST(SDNode *N);
	SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
	SDValue WidenVecOp_EXTEND(SDNode *N);
	SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
	SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
	SDValue WidenVecOp_STORE(SDNode* N);
	SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
	SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
	SDValue WidenVecOp_SETCC(SDNode* N);

	SDValue WidenVecOp_Convert(SDNode *N);
	SDValue WidenVecOp_FCOPYSIGN(SDNode *N);

	//===--------------------------------------------------------------------===//
	// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
	//===--------------------------------------------------------------------===//

	/// Helper function to generate a set of loads to load a vector with a
	/// resulting wider type. It takes:
	/// LdChain: list of chains for the load to be generated.
	/// Ld: load to widen
	SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
	LoadSDNode *LD);

	/// Helper function to generate a set of extension loads to load a vector with
	/// a resulting wider type. It takes:
	/// LdChain: list of chains for the load to be generated.
	/// Ld: load to widen
	/// ExtType: extension element type
	SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
	LoadSDNode *LD, ISD::LoadExtType ExtType);

	/// Helper function to generate a set of stores to store a widen vector into
	/// non-widen memory.
	/// StChain: list of chains for the stores we have generated
	/// ST: store of a widen value
	void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);

	/// Helper function to generate a set of stores to store a truncate widen
	/// vector into non-widen memory.
	/// StChain: list of chains for the stores we have generated
	/// ST: store of a widen value
	void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
	StoreSDNode *ST);

	/// Modifies a vector input (widen or narrows) to a vector of NVT. The
	/// input vector must have the same element type as NVT.
	/// When FillWithZeroes is "on" the vector will be widened with zeroes.
	/// By default, the vector will be widened with undefined values.
	SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);

	/// Return a mask of vector type MaskVT to replace InMask. Also adjust
	/// MaskVT to ToMaskVT if needed with vector extension or truncation.
	SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT);

	/// Get the target mask VT, and widen if needed.
	EVT getSETCCWidenedResultTy(SDValue SetCC);

	//===--------------------------------------------------------------------===//
	// Generic Splitting: LegalizeTypesGeneric.cpp
	//===--------------------------------------------------------------------===//

	// Legalization methods which only use that the illegal type is split into two
	// not necessarily identical types. As such they can be used for splitting
	// vectors and expanding integers and floats.

	void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
	if (Op.getValueType().isVector())
	GetSplitVector(Op, Lo, Hi);
	else if (Op.getValueType().isInteger())
	GetExpandedInteger(Op, Lo, Hi);
	else
	GetExpandedFloat(Op, Lo, Hi);
	}

	/// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the
	/// given value.
	void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);

	// Generic Result Splitting.
	void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
	SDValue &Lo, SDValue &Hi);
	void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
	void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);

	//===--------------------------------------------------------------------===//
	// Generic Expansion: LegalizeTypesGeneric.cpp
	//===--------------------------------------------------------------------===//

	// Legalization methods which only use that the illegal type is split into two
	// identical types of half the size, and that the Lo/Hi part is stored first
	// in memory on little/big-endian machines, followed by the Hi/Lo part. As
	// such they can be used for expanding integers and floats.

	void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
	if (Op.getValueType().isInteger())
	GetExpandedInteger(Op, Lo, Hi);
	else
	GetExpandedFloat(Op, Lo, Hi);
	}


	/// This function will split the integer \p Op into \p NumElements
	/// operations of type \p EltVT and store them in \p Ops.
	void IntegerToVector(SDValue Op, unsigned NumElements,
	SmallVectorImpl<SDValue> &Ops, EVT EltVT);

	// Generic Result Expansion.
	void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
	SDValue &Lo, SDValue &Hi);
	void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi);
	void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);

	// Generic Operand Expansion.
	SDValue ExpandOp_BITCAST (SDNode *N);
	SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
	SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
	SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
	SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
	SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo);
	};

	} // end namespace llvm.

	#endif
	Index: head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
	===================================================================
	--- head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (revision 322854)
	+++ head/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (revision 322855)
	@@ -1,4086 +1,4119 @@
	//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file performs vector type splitting and scalarization for LegalizeTypes.
	// Scalarization is the act of changing a computation in an illegal one-element
	// vector type to be a computation in its scalar element type. For example,
	// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
	// as a base case when scalarizing vector arithmetic like <4 x f32>, which
	// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
	// types.
	// Splitting is the act of changing a computation in an invalid vector type to
	// be a computation in two vectors of half the size. For example, implementing
	// <128 x f32> operations in terms of two <64 x f32> operations.
	//
	//===----------------------------------------------------------------------===//

	#include "LegalizeTypes.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/raw_ostream.h"
	using namespace llvm;

	#define DEBUG_TYPE "legalize-types"

	//===----------------------------------------------------------------------===//
	// Result Vector Scalarization: <1 x ty> -> ty.
	//===----------------------------------------------------------------------===//

	void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
	DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
	N->dump(&DAG);
	dbgs() << "\n");
	SDValue R = SDValue();

	switch (N->getOpcode()) {
	default:
	#ifndef NDEBUG
	dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
	N->dump(&DAG);
	dbgs() << "\n";
	#endif
	report_fatal_error("Do not know how to scalarize the result of this "
	"operator!\n");

	case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
	case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
	case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
	case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
	case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
	case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
	case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
	case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
	case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
	case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
	case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
	case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
	case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
	case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
	case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
	case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
	case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
	case ISD::ANY_EXTEND_VECTOR_INREG:
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	R = ScalarizeVecRes_VecInregOp(N);
	break;
	case ISD::ANY_EXTEND:
	case ISD::BITREVERSE:
	case ISD::BSWAP:
	case ISD::CTLZ:
	case ISD::CTLZ_ZERO_UNDEF:
	case ISD::CTPOP:
	case ISD::CTTZ:
	case ISD::CTTZ_ZERO_UNDEF:
	case ISD::FABS:
	case ISD::FCEIL:
	case ISD::FCOS:
	case ISD::FEXP:
	case ISD::FEXP2:
	case ISD::FFLOOR:
	case ISD::FLOG:
	case ISD::FLOG10:
	case ISD::FLOG2:
	case ISD::FNEARBYINT:
	case ISD::FNEG:
	case ISD::FP_EXTEND:
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	case ISD::FRINT:
	case ISD::FROUND:
	case ISD::FSIN:
	case ISD::FSQRT:
	case ISD::FTRUNC:
	case ISD::SIGN_EXTEND:
	case ISD::SINT_TO_FP:
	case ISD::TRUNCATE:
	case ISD::UINT_TO_FP:
	case ISD::ZERO_EXTEND:
	case ISD::FCANONICALIZE:
	R = ScalarizeVecRes_UnaryOp(N);
	break;

	case ISD::ADD:
	case ISD::AND:
	case ISD::FADD:
	case ISD::FCOPYSIGN:
	case ISD::FDIV:
	case ISD::FMUL:
	case ISD::FMINNUM:
	case ISD::FMAXNUM:
	case ISD::FMINNAN:
	case ISD::FMAXNAN:
	case ISD::SMIN:
	case ISD::SMAX:
	case ISD::UMIN:
	case ISD::UMAX:

	case ISD::FPOW:
	case ISD::FREM:
	case ISD::FSUB:
	case ISD::MUL:
	case ISD::OR:
	case ISD::SDIV:
	case ISD::SREM:
	case ISD::SUB:
	case ISD::UDIV:
	case ISD::UREM:
	case ISD::XOR:
	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL:
	R = ScalarizeVecRes_BinOp(N);
	break;
	case ISD::FMA:
	R = ScalarizeVecRes_TernaryOp(N);
	break;
	}

	// If R is null, the sub-method took care of registering the result.
	if (R.getNode())
	SetScalarizedVector(SDValue(N, ResNo), R);
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
	SDValue LHS = GetScalarizedVector(N->getOperand(0));
	SDValue RHS = GetScalarizedVector(N->getOperand(1));
	return DAG.getNode(N->getOpcode(), SDLoc(N),
	LHS.getValueType(), LHS, RHS, N->getFlags());
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
	SDValue Op0 = GetScalarizedVector(N->getOperand(0));
	SDValue Op1 = GetScalarizedVector(N->getOperand(1));
	SDValue Op2 = GetScalarizedVector(N->getOperand(2));
	return DAG.getNode(N->getOpcode(), SDLoc(N),
	Op0.getValueType(), Op0, Op1, Op2);
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
	unsigned ResNo) {
	SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
	return GetScalarizedVector(Op);
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
	EVT NewVT = N->getValueType(0).getVectorElementType();
	return DAG.getNode(ISD::BITCAST, SDLoc(N),
	NewVT, N->getOperand(0));
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
	EVT EltVT = N->getValueType(0).getVectorElementType();
	SDValue InOp = N->getOperand(0);
	// The BUILD_VECTOR operands may be of wider element types and
	// we may need to truncate them back to the requested return type.
	if (EltVT.isInteger())
	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
	return InOp;
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
	N->getValueType(0).getVectorElementType(),
	N->getOperand(0), N->getOperand(1));
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
	EVT NewVT = N->getValueType(0).getVectorElementType();
	SDValue Op = GetScalarizedVector(N->getOperand(0));
	return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
	NewVT, Op, N->getOperand(1));
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
	SDValue Op = GetScalarizedVector(N->getOperand(0));
	return DAG.getNode(ISD::FPOWI, SDLoc(N),
	Op.getValueType(), Op, N->getOperand(1));
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
	// The value to insert may have a wider type than the vector element type,
	// so be sure to truncate it to the element type if necessary.
	SDValue Op = N->getOperand(1);
	EVT EltVT = N->getValueType(0).getVectorElementType();
	if (Op.getValueType() != EltVT)
	// FIXME: Can this happen for floating point types?
	Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op);
	return Op;
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
	assert(N->isUnindexed() && "Indexed vector load?");

	SDValue Result = DAG.getLoad(
	ISD::UNINDEXED, N->getExtensionType(),
	N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(),
	N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()),
	N->getPointerInfo(), N->getMemoryVT().getVectorElementType(),
	N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
	N->getAAInfo());

	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
	return Result;
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
	// Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
	EVT DestVT = N->getValueType(0).getVectorElementType();
	SDValue Op = N->getOperand(0);
	EVT OpVT = Op.getValueType();
	SDLoc DL(N);
	// The result needs scalarizing, but it's not a given that the source does.
	// This is a workaround for targets where it's impossible to scalarize the
	// result of a conversion, because the source type is legal.
	// For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
	// are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
	// legal and was not scalarized.
	// See the similar logic in ScalarizeVecRes_VSETCC
	if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
	Op = GetScalarizedVector(Op);
	} else {
	EVT VT = OpVT.getVectorElementType();
	Op = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
	DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
	}
	return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
	EVT EltVT = N->getValueType(0).getVectorElementType();
	EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
	SDValue LHS = GetScalarizedVector(N->getOperand(0));
	return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT,
	LHS, DAG.getValueType(ExtVT));
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) {
	SDLoc DL(N);
	SDValue Op = N->getOperand(0);

	EVT OpVT = Op.getValueType();
	EVT OpEltVT = OpVT.getVectorElementType();
	EVT EltVT = N->getValueType(0).getVectorElementType();

	if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
	Op = GetScalarizedVector(Op);
	} else {
	Op = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op,
	DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
	}

	switch (N->getOpcode()) {
	case ISD::ANY_EXTEND_VECTOR_INREG:
	return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op);
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op);
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op);
	}

	llvm_unreachable("Illegal extend_vector_inreg opcode");
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
	// If the operand is wider than the vector element type then it is implicitly
	// truncated. Make that explicit here.
	EVT EltVT = N->getValueType(0).getVectorElementType();
	SDValue InOp = N->getOperand(0);
	if (InOp.getValueType() != EltVT)
	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
	return InOp;
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
	SDValue Cond = N->getOperand(0);
	EVT OpVT = Cond.getValueType();
	SDLoc DL(N);
	// The vselect result and true/value operands needs scalarizing, but it's
	// not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
	// See the similar logic in ScalarizeVecRes_VSETCC
	if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
	Cond = GetScalarizedVector(Cond);
	} else {
	EVT VT = OpVT.getVectorElementType();
	Cond = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
	DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
	}

	SDValue LHS = GetScalarizedVector(N->getOperand(1));
	TargetLowering::BooleanContent ScalarBool =
	TLI.getBooleanContents(false, false);
	TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false);

	// If integer and float booleans have different contents then we can't
	// reliably optimize in all cases. There is a full explanation for this in
	// DAGCombiner::visitSELECT() where the same issue affects folding
	// (select C, 0, 1) to (xor C, 1).
	if (TLI.getBooleanContents(false, false) !=
	TLI.getBooleanContents(false, true)) {
	// At least try the common case where the boolean is generated by a
	// comparison.
	if (Cond->getOpcode() == ISD::SETCC) {
	EVT OpVT = Cond->getOperand(0)->getValueType(0);
	ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
	VecBool = TLI.getBooleanContents(OpVT);
	} else
	ScalarBool = TargetLowering::UndefinedBooleanContent;
	}

	if (ScalarBool != VecBool) {
	EVT CondVT = Cond.getValueType();
	switch (ScalarBool) {
	case TargetLowering::UndefinedBooleanContent:
	break;
	case TargetLowering::ZeroOrOneBooleanContent:
	assert(VecBool == TargetLowering::UndefinedBooleanContent \|\|
	VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
	// Vector read from all ones, scalar expects a single 1 so mask.
	Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT,
	Cond, DAG.getConstant(1, SDLoc(N), CondVT));
	break;
	case TargetLowering::ZeroOrNegativeOneBooleanContent:
	assert(VecBool == TargetLowering::UndefinedBooleanContent \|\|
	VecBool == TargetLowering::ZeroOrOneBooleanContent);
	// Vector reads from a one, scalar from all ones so sign extend.
	Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT,
	Cond, DAG.getValueType(MVT::i1));
	break;
	}
	}

	return DAG.getSelect(SDLoc(N),
	LHS.getValueType(), Cond, LHS,
	GetScalarizedVector(N->getOperand(2)));
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
	SDValue LHS = GetScalarizedVector(N->getOperand(1));
	return DAG.getSelect(SDLoc(N),
	LHS.getValueType(), N->getOperand(0), LHS,
	GetScalarizedVector(N->getOperand(2)));
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
	SDValue LHS = GetScalarizedVector(N->getOperand(2));
	return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(),
	N->getOperand(0), N->getOperand(1),
	LHS, GetScalarizedVector(N->getOperand(3)),
	N->getOperand(4));
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
	assert(N->getValueType(0).isVector() ==
	N->getOperand(0).getValueType().isVector() &&
	"Scalar/Vector type mismatch");

	if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N);

	SDValue LHS = GetScalarizedVector(N->getOperand(0));
	SDValue RHS = GetScalarizedVector(N->getOperand(1));
	SDLoc DL(N);

	// Turn it into a scalar SETCC.
	return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
	return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
	// Figure out if the scalar is the LHS or RHS and return it.
	SDValue Arg = N->getOperand(2).getOperand(0);
	if (Arg.isUndef())
	return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
	unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
	return GetScalarizedVector(N->getOperand(Op));
	}

	SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
	assert(N->getValueType(0).isVector() &&
	N->getOperand(0).getValueType().isVector() &&
	"Operand types must be vectors");
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	EVT OpVT = LHS.getValueType();
	EVT NVT = N->getValueType(0).getVectorElementType();
	SDLoc DL(N);

	// The result needs scalarizing, but it's not a given that the source does.
	if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
	LHS = GetScalarizedVector(LHS);
	RHS = GetScalarizedVector(RHS);
	} else {
	EVT VT = OpVT.getVectorElementType();
	LHS = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
	DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
	RHS = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
	DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
	}

	// Turn it into a scalar SETCC.
	SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
	N->getOperand(2));
	// Vectors may have a different boolean contents to scalars. Promote the
	// value appropriately.
	ISD::NodeType ExtendCode =
	TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
	return DAG.getNode(ExtendCode, DL, NVT, Res);
	}


	//===----------------------------------------------------------------------===//
	// Operand Vector Scalarization <1 x ty> -> ty.
	//===----------------------------------------------------------------------===//

	bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
	DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
	N->dump(&DAG);
	dbgs() << "\n");
	SDValue Res = SDValue();

	if (!Res.getNode()) {
	switch (N->getOpcode()) {
	default:
	#ifndef NDEBUG
	dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
	N->dump(&DAG);
	dbgs() << "\n";
	#endif
	llvm_unreachable("Do not know how to scalarize this operator's operand!");
	case ISD::BITCAST:
	Res = ScalarizeVecOp_BITCAST(N);
	break;
	case ISD::ANY_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::SIGN_EXTEND:
	case ISD::TRUNCATE:
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP:
	Res = ScalarizeVecOp_UnaryOp(N);
	break;
	case ISD::CONCAT_VECTORS:
	Res = ScalarizeVecOp_CONCAT_VECTORS(N);
	break;
	case ISD::EXTRACT_VECTOR_ELT:
	Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
	break;
	case ISD::VSELECT:
	Res = ScalarizeVecOp_VSELECT(N);
	break;
	+ case ISD::SETCC:
	+ Res = ScalarizeVecOp_VSETCC(N);
	+ break;
	case ISD::STORE:
	Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
	break;
	case ISD::FP_ROUND:
	Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
	break;
	}
	}

	// If the result is null, the sub-method took care of registering results etc.
	if (!Res.getNode()) return false;

	// If the result is N, the sub-method updated N in place. Tell the legalizer
	// core about this.
	if (Res.getNode() == N)
	return true;

	assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
	"Invalid operand expansion");

	ReplaceValueWith(SDValue(N, 0), Res);
	return false;
	}

	/// If the value to convert is a vector that needs to be scalarized, it must be
	/// <1 x ty>. Convert the element instead.
	SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
	SDValue Elt = GetScalarizedVector(N->getOperand(0));
	return DAG.getNode(ISD::BITCAST, SDLoc(N),
	N->getValueType(0), Elt);
	}

	/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
	/// Do the operation on the element instead.
	SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
	assert(N->getValueType(0).getVectorNumElements() == 1 &&
	"Unexpected vector type!");
	SDValue Elt = GetScalarizedVector(N->getOperand(0));
	SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N),
	N->getValueType(0).getScalarType(), Elt);
	// Revectorize the result so the types line up with what the uses of this
	// expression expect.
	return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op);
	}

	/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
	SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
	SmallVector<SDValue, 8> Ops(N->getNumOperands());
	for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
	Ops[i] = GetScalarizedVector(N->getOperand(i));
	return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops);
	}

	/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
	/// so just return the element, ignoring the index.
	SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
	EVT VT = N->getValueType(0);
	SDValue Res = GetScalarizedVector(N->getOperand(0));
	if (Res.getValueType() != VT)
	Res = VT.isFloatingPoint()
	? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res)
	: DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
	return Res;
	}

	/// If the input condition is a vector that needs to be scalarized, it must be
	/// <1 x i1>, so just convert to a normal ISD::SELECT
	/// (still with vector output type since that was acceptable if we got here).
	SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
	SDValue ScalarCond = GetScalarizedVector(N->getOperand(0));
	EVT VT = N->getValueType(0);

	return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1),
	N->getOperand(2));
	+}
	+
	+/// If the operand is a vector that needs to be scalarized then the
	+/// result must be v1i1, so just convert to a scalar SETCC and wrap
	+/// with a scalar_to_vector since the res type is legal if we got here
	+SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) {
	+ assert(N->getValueType(0).isVector() &&
	+ N->getOperand(0).getValueType().isVector() &&
	+ "Operand types must be vectors");
	+ assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
	+
	+ EVT VT = N->getValueType(0);
	+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
	+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
	+
	+ EVT OpVT = N->getOperand(0).getValueType();
	+ EVT NVT = VT.getVectorElementType();
	+ SDLoc DL(N);
	+ // Turn it into a scalar SETCC.
	+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
	+ N->getOperand(2));
	+
	+ // Vectors may have a different boolean contents to scalars. Promote the
	+ // value appropriately.
	+ ISD::NodeType ExtendCode =
	+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
	+
	+ Res = DAG.getNode(ExtendCode, DL, NVT, Res);
	+
	+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
	}

	/// If the value to store is a vector that needs to be scalarized, it must be
	/// <1 x ty>. Just store the element.
	SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
	assert(N->isUnindexed() && "Indexed store of one-element vector?");
	assert(OpNo == 1 && "Do not know how to scalarize this operand!");
	SDLoc dl(N);

	if (N->isTruncatingStore())
	return DAG.getTruncStore(
	N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
	N->getBasePtr(), N->getPointerInfo(),
	N->getMemoryVT().getVectorElementType(), N->getAlignment(),
	N->getMemOperand()->getFlags(), N->getAAInfo());

	return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
	N->getBasePtr(), N->getPointerInfo(),
	N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
	N->getAAInfo());
	}

	/// If the value to round is a vector that needs to be scalarized, it must be
	/// <1 x ty>. Convert the element instead.
	SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
	SDValue Elt = GetScalarizedVector(N->getOperand(0));
	SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
	N->getValueType(0).getVectorElementType(), Elt,
	N->getOperand(1));
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
	}

	//===----------------------------------------------------------------------===//
	// Result Vector Splitting
	//===----------------------------------------------------------------------===//

	/// This method is called when the specified result of the specified node is
	/// found to need vector splitting. At this point, the node may also have
	/// invalid operands or may have other results that need legalization, we just
	/// know that (at least) one result needs vector splitting.
	void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
	DEBUG(dbgs() << "Split node result: ";
	N->dump(&DAG);
	dbgs() << "\n");
	SDValue Lo, Hi;

	// See if the target wants to custom expand this node.
	if (CustomLowerNode(N, N->getValueType(ResNo), true))
	return;

	switch (N->getOpcode()) {
	default:
	#ifndef NDEBUG
	dbgs() << "SplitVectorResult #" << ResNo << ": ";
	N->dump(&DAG);
	dbgs() << "\n";
	#endif
	report_fatal_error("Do not know how to split the result of this "
	"operator!\n");

	case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
	case ISD::VSELECT:
	case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
	case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
	case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
	case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
	case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
	case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
	case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
	case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
	case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
	case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
	case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
	case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
	case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
	case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
	case ISD::LOAD:
	SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
	break;
	case ISD::MLOAD:
	SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
	break;
	case ISD::MGATHER:
	SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi);
	break;
	case ISD::SETCC:
	SplitVecRes_SETCC(N, Lo, Hi);
	break;
	case ISD::VECTOR_SHUFFLE:
	SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
	break;

	case ISD::ANY_EXTEND_VECTOR_INREG:
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
	break;

	case ISD::BITREVERSE:
	case ISD::BSWAP:
	case ISD::CTLZ:
	case ISD::CTTZ:
	case ISD::CTLZ_ZERO_UNDEF:
	case ISD::CTTZ_ZERO_UNDEF:
	case ISD::CTPOP:
	case ISD::FABS:
	case ISD::FCEIL:
	case ISD::FCOS:
	case ISD::FEXP:
	case ISD::FEXP2:
	case ISD::FFLOOR:
	case ISD::FLOG:
	case ISD::FLOG10:
	case ISD::FLOG2:
	case ISD::FNEARBYINT:
	case ISD::FNEG:
	case ISD::FP_EXTEND:
	case ISD::FP_ROUND:
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	case ISD::FRINT:
	case ISD::FROUND:
	case ISD::FSIN:
	case ISD::FSQRT:
	case ISD::FTRUNC:
	case ISD::SINT_TO_FP:
	case ISD::TRUNCATE:
	case ISD::UINT_TO_FP:
	case ISD::FCANONICALIZE:
	SplitVecRes_UnaryOp(N, Lo, Hi);
	break;

	case ISD::ANY_EXTEND:
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	SplitVecRes_ExtendOp(N, Lo, Hi);
	break;

	case ISD::ADD:
	case ISD::SUB:
	case ISD::MUL:
	case ISD::MULHS:
	case ISD::MULHU:
	case ISD::FADD:
	case ISD::FSUB:
	case ISD::FMUL:
	case ISD::FMINNUM:
	case ISD::FMAXNUM:
	case ISD::FMINNAN:
	case ISD::FMAXNAN:
	case ISD::SDIV:
	case ISD::UDIV:
	case ISD::FDIV:
	case ISD::FPOW:
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR:
	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL:
	case ISD::UREM:
	case ISD::SREM:
	case ISD::FREM:
	case ISD::SMIN:
	case ISD::SMAX:
	case ISD::UMIN:
	case ISD::UMAX:
	SplitVecRes_BinOp(N, Lo, Hi);
	break;
	case ISD::FMA:
	SplitVecRes_TernaryOp(N, Lo, Hi);
	break;
	}

	// If Lo/Hi is null, the sub-method took care of registering results etc.
	if (Lo.getNode())
	SetSplitVector(SDValue(N, ResNo), Lo, Hi);
	}

	void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDValue LHSLo, LHSHi;
	GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
	SDValue RHSLo, RHSHi;
	GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
	SDLoc dl(N);

	const SDNodeFlags Flags = N->getFlags();
	unsigned Opcode = N->getOpcode();
	Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
	Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
	}

	void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDValue Op0Lo, Op0Hi;
	GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
	SDValue Op1Lo, Op1Hi;
	GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
	SDValue Op2Lo, Op2Hi;
	GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
	SDLoc dl(N);

	Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
	Op0Lo, Op1Lo, Op2Lo);
	Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
	Op0Hi, Op1Hi, Op2Hi);
	}

	void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	// We know the result is a vector. The input may be either a vector or a
	// scalar value.
	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
	SDLoc dl(N);

	SDValue InOp = N->getOperand(0);
	EVT InVT = InOp.getValueType();

	// Handle some special cases efficiently.
	switch (getTypeAction(InVT)) {
	case TargetLowering::TypeLegal:
	case TargetLowering::TypePromoteInteger:
	case TargetLowering::TypePromoteFloat:
	case TargetLowering::TypeSoftenFloat:
	case TargetLowering::TypeScalarizeVector:
	case TargetLowering::TypeWidenVector:
	break;
	case TargetLowering::TypeExpandInteger:
	case TargetLowering::TypeExpandFloat:
	// A scalar to vector conversion, where the scalar needs expansion.
	// If the vector is being split in two then we can just convert the
	// expanded pieces.
	if (LoVT == HiVT) {
	GetExpandedOp(InOp, Lo, Hi);
	if (DAG.getDataLayout().isBigEndian())
	std::swap(Lo, Hi);
	Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
	Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
	return;
	}
	break;
	case TargetLowering::TypeSplitVector:
	// If the input is a vector that needs to be split, convert each split
	// piece of the input now.
	GetSplitVector(InOp, Lo, Hi);
	Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
	Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
	return;
	}

	// In the general case, convert the input to an integer and split it by hand.
	EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
	EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
	if (DAG.getDataLayout().isBigEndian())
	std::swap(LoIntVT, HiIntVT);

	SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);

	if (DAG.getDataLayout().isBigEndian())
	std::swap(Lo, Hi);
	Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
	Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
	}

	void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	EVT LoVT, HiVT;
	SDLoc dl(N);
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
	unsigned LoNumElts = LoVT.getVectorNumElements();
	SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
	Lo = DAG.getBuildVector(LoVT, dl, LoOps);

	SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
	Hi = DAG.getBuildVector(HiVT, dl, HiOps);
	}

	void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
	SDLoc dl(N);
	unsigned NumSubvectors = N->getNumOperands() / 2;
	if (NumSubvectors == 1) {
	Lo = N->getOperand(0);
	Hi = N->getOperand(1);
	return;
	}

	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));

	SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
	Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps);

	SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
	Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps);
	}

	void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDValue Vec = N->getOperand(0);
	SDValue Idx = N->getOperand(1);
	SDLoc dl(N);

	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));

	Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
	uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
	Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
	DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl,
	TLI.getVectorIdxTy(DAG.getDataLayout())));
	}

	void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDValue Vec = N->getOperand(0);
	SDValue SubVec = N->getOperand(1);
	SDValue Idx = N->getOperand(2);
	SDLoc dl(N);
	GetSplitVector(Vec, Lo, Hi);

	EVT VecVT = Vec.getValueType();
	unsigned VecElems = VecVT.getVectorNumElements();
	unsigned SubElems = SubVec.getValueType().getVectorNumElements();

	// If we know the index is 0, and we know the subvector doesn't cross the
	// boundary between the halves, we can avoid spilling the vector, and insert
	// into the lower half of the split vector directly.
	// TODO: The IdxVal == 0 constraint is artificial, we could do this whenever
	// the index is constant and there is no boundary crossing. But those cases
	// don't seem to get hit in practice.
	if (ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(Idx)) {
	unsigned IdxVal = ConstIdx->getZExtValue();
	if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) {
	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
	Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
	return;
	}
	}

	// Spill the vector to the stack.
	SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
	SDValue Store =
	DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());

	// Store the new subvector into the specified index.
	SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
	Type VecType = VecVT.getTypeForEVT(DAG.getContext());
	unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
	Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo());

	// Load the Lo part from the stack slot.
	Lo =
	DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());

	// Increment the pointer to the other part.
	unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
	StackPtr =
	DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
	DAG.getConstant(IncrementSize, dl, StackPtr.getValueType()));

	// Load the Hi part from the stack slot.
	Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
	MinAlign(Alignment, IncrementSize));
	}

	void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDLoc dl(N);
	GetSplitVector(N->getOperand(0), Lo, Hi);
	Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
	Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
	}

	void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDValue LHSLo, LHSHi;
	GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
	SDLoc DL(N);

	SDValue RHSLo, RHSHi;
	SDValue RHS = N->getOperand(1);
	EVT RHSVT = RHS.getValueType();
	if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
	GetSplitVector(RHS, RHSLo, RHSHi);
	else
	std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));


	Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
	Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
	}

	void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDValue LHSLo, LHSHi;
	GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
	SDLoc dl(N);

	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) =
	DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());

	Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
	DAG.getValueType(LoVT));
	Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
	DAG.getValueType(HiVT));
	}

	void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	unsigned Opcode = N->getOpcode();
	SDValue N0 = N->getOperand(0);

	SDLoc dl(N);
	SDValue InLo, InHi;

	if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector)
	GetSplitVector(N0, InLo, InHi);
	else
	std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0);

	EVT InLoVT = InLo.getValueType();
	unsigned InNumElements = InLoVT.getVectorNumElements();

	EVT OutLoVT, OutHiVT;
	std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
	unsigned OutNumElements = OutLoVT.getVectorNumElements();
	assert((2 * OutNumElements) <= InNumElements &&
	"Illegal extend vector in reg split");

	// *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the
	// input vector (i.e. we only use InLo):
	// OutLo will extend the first OutNumElements from InLo.
	// OutHi will extend the next OutNumElements from InLo.

	// Shuffle the elements from InLo for OutHi into the bottom elements to
	// create a 'fake' InHi.
	SmallVector<int, 8> SplitHi(InNumElements, -1);
	for (unsigned i = 0; i != OutNumElements; ++i)
	SplitHi[i] = i + OutNumElements;
	InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getUNDEF(InLoVT), SplitHi);

	Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo);
	Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi);
	}

	void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDValue Vec = N->getOperand(0);
	SDValue Elt = N->getOperand(1);
	SDValue Idx = N->getOperand(2);
	SDLoc dl(N);
	GetSplitVector(Vec, Lo, Hi);

	if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
	unsigned IdxVal = CIdx->getZExtValue();
	unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
	if (IdxVal < LoNumElts)
	Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
	Lo.getValueType(), Lo, Elt, Idx);
	else
	Hi =
	DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
	DAG.getConstant(IdxVal - LoNumElts, dl,
	TLI.getVectorIdxTy(DAG.getDataLayout())));
	return;
	}

	// See if the target wants to custom expand this node.
	if (CustomLowerNode(N, N->getValueType(0), true))
	return;

	// Spill the vector to the stack.
	EVT VecVT = Vec.getValueType();
	EVT EltVT = VecVT.getVectorElementType();
	SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
	SDValue Store =
	DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());

	// Store the new element. This may be larger than the vector element type,
	// so use a truncating store.
	SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
	Type VecType = VecVT.getTypeForEVT(DAG.getContext());
	unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
	Store =
	DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT);

	// Load the Lo part from the stack slot.
	Lo =
	DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());

	// Increment the pointer to the other part.
	unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
	StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
	DAG.getConstant(IncrementSize, dl,
	StackPtr.getValueType()));

	// Load the Hi part from the stack slot.
	Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
	MinAlign(Alignment, IncrementSize));
	}

	void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	EVT LoVT, HiVT;
	SDLoc dl(N);
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
	Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
	Hi = DAG.getUNDEF(HiVT);
	}

	void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
	SDValue &Hi) {
	assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
	EVT LoVT, HiVT;
	SDLoc dl(LD);
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));

	ISD::LoadExtType ExtType = LD->getExtensionType();
	SDValue Ch = LD->getChain();
	SDValue Ptr = LD->getBasePtr();
	SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
	EVT MemoryVT = LD->getMemoryVT();
	unsigned Alignment = LD->getOriginalAlignment();
	MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
	AAMDNodes AAInfo = LD->getAAInfo();

	EVT LoMemVT, HiMemVT;
	std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

	Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
	LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);

	unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
	Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
	DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
	Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
	LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT,
	Alignment, MMOFlags, AAInfo);

	// Build a factor node to remember that this load is independent of the
	// other one.
	Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
	Hi.getValue(1));

	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(LD, 1), Ch);
	}

	void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
	SDValue &Lo, SDValue &Hi) {
	EVT LoVT, HiVT;
	SDLoc dl(MLD);
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));

	SDValue Ch = MLD->getChain();
	SDValue Ptr = MLD->getBasePtr();
	SDValue Mask = MLD->getMask();
	SDValue Src0 = MLD->getSrc0();
	unsigned Alignment = MLD->getOriginalAlignment();
	ISD::LoadExtType ExtType = MLD->getExtensionType();

	// if Alignment is equal to the vector size,
	// take the half of it for the second part
	unsigned SecondHalfAlignment =
	(Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
	Alignment/2 : Alignment;

	// Split Mask operand
	SDValue MaskLo, MaskHi;
	if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
	GetSplitVector(Mask, MaskLo, MaskHi);
	else
	std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);

	EVT MemoryVT = MLD->getMemoryVT();
	EVT LoMemVT, HiMemVT;
	std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

	SDValue Src0Lo, Src0Hi;
	if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
	GetSplitVector(Src0, Src0Lo, Src0Hi);
	else
	std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);

	MachineMemOperand *MMO = DAG.getMachineFunction().
	getMachineMemOperand(MLD->getPointerInfo(),
	MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
	Alignment, MLD->getAAInfo(), MLD->getRanges());

	Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
	ExtType, MLD->isExpandingLoad());

	Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
	MLD->isExpandingLoad());

	MMO = DAG.getMachineFunction().
	getMachineMemOperand(MLD->getPointerInfo(),
	MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
	SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());

	Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
	ExtType, MLD->isExpandingLoad());


	// Build a factor node to remember that this load is independent of the
	// other one.
	Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
	Hi.getValue(1));

	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(MLD, 1), Ch);

	}

	void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
	SDValue &Lo, SDValue &Hi) {
	EVT LoVT, HiVT;
	SDLoc dl(MGT);
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));

	SDValue Ch = MGT->getChain();
	SDValue Ptr = MGT->getBasePtr();
	SDValue Mask = MGT->getMask();
	SDValue Src0 = MGT->getValue();
	SDValue Index = MGT->getIndex();
	unsigned Alignment = MGT->getOriginalAlignment();

	// Split Mask operand
	SDValue MaskLo, MaskHi;
	if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
	GetSplitVector(Mask, MaskLo, MaskHi);
	else
	std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);

	EVT MemoryVT = MGT->getMemoryVT();
	EVT LoMemVT, HiMemVT;
	// Split MemoryVT
	std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

	SDValue Src0Lo, Src0Hi;
	if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
	GetSplitVector(Src0, Src0Lo, Src0Hi);
	else
	std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);

	SDValue IndexHi, IndexLo;
	if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
	GetSplitVector(Index, IndexLo, IndexHi);
	else
	std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);

	MachineMemOperand *MMO = DAG.getMachineFunction().
	getMachineMemOperand(MGT->getPointerInfo(),
	MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
	Alignment, MGT->getAAInfo(), MGT->getRanges());

	SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
	Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
	MMO);

	SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
	Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
	MMO);

	// Build a factor node to remember that this load is independent of the
	// other one.
	Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
	Hi.getValue(1));

	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(MGT, 1), Ch);
	}


	void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
	assert(N->getValueType(0).isVector() &&
	N->getOperand(0).getValueType().isVector() &&
	"Operand types must be vectors");

	EVT LoVT, HiVT;
	SDLoc DL(N);
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));

	// Split the input.
	SDValue LL, LH, RL, RH;
	std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
	std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);

	Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
	Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
	}

	void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	// Get the dest types - they may not match the input types, e.g. int_to_fp.
	EVT LoVT, HiVT;
	SDLoc dl(N);
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));

	// If the input also splits, handle it directly for a compile time speedup.
	// Otherwise split it by hand.
	EVT InVT = N->getOperand(0).getValueType();
	if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
	GetSplitVector(N->getOperand(0), Lo, Hi);
	else
	std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);

	if (N->getOpcode() == ISD::FP_ROUND) {
	Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
	Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
	} else {
	Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
	Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
	}
	}

	void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDLoc dl(N);
	EVT SrcVT = N->getOperand(0).getValueType();
	EVT DestVT = N->getValueType(0);
	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);

	// We can do better than a generic split operation if the extend is doing
	// more than just doubling the width of the elements and the following are
	// true:
	// - The number of vector elements is even,
	// - the source type is legal,
	// - the type of a split source is illegal,
	// - the type of an extended (by doubling element size) source is legal, and
	// - the type of that extended source when split is legal.
	//
	// This won't necessarily completely legalize the operation, but it will
	// more effectively move in the right direction and prevent falling down
	// to scalarization in many cases due to the input vector being split too
	// far.
	unsigned NumElements = SrcVT.getVectorNumElements();
	if ((NumElements & 1) == 0 &&
	SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
	LLVMContext &Ctx = *DAG.getContext();
	EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx);
	EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx);

	EVT SplitLoVT, SplitHiVT;
	std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
	if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
	TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
	DEBUG(dbgs() << "Split vector extend via incremental extend:";
	N->dump(&DAG); dbgs() << "\n");
	// Extend the source vector by one step.
	SDValue NewSrc =
	DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
	// Get the low and high halves of the new, extended one step, vector.
	std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
	// Extend those vector halves the rest of the way.
	Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
	Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
	return;
	}
	}
	// Fall back to the generic unary operator splitting otherwise.
	SplitVecRes_UnaryOp(N, Lo, Hi);
	}

	void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
	SDValue &Lo, SDValue &Hi) {
	// The low and high parts of the original input give four input vectors.
	SDValue Inputs[4];
	SDLoc dl(N);
	GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
	GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
	EVT NewVT = Inputs[0].getValueType();
	unsigned NewElts = NewVT.getVectorNumElements();

	// If Lo or Hi uses elements from at most two of the four input vectors, then
	// express it as a vector shuffle of those two inputs. Otherwise extract the
	// input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
	SmallVector<int, 16> Ops;
	for (unsigned High = 0; High < 2; ++High) {
	SDValue &Output = High ? Hi : Lo;

	// Build a shuffle mask for the output, discovering on the fly which
	// input vectors to use as shuffle operands (recorded in InputUsed).
	// If building a suitable shuffle vector proves too hard, then bail
	// out with useBuildVector set.
	unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
	unsigned FirstMaskIdx = High * NewElts;
	bool useBuildVector = false;
	for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
	// The mask element. This indexes into the input.
	int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);

	// The input vector this mask element indexes into.
	unsigned Input = (unsigned)Idx / NewElts;

	if (Input >= array_lengthof(Inputs)) {
	// The mask element does not index into any input vector.
	Ops.push_back(-1);
	continue;
	}

	// Turn the index into an offset from the start of the input vector.
	Idx -= Input * NewElts;

	// Find or create a shuffle vector operand to hold this input.
	unsigned OpNo;
	for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
	if (InputUsed[OpNo] == Input) {
	// This input vector is already an operand.
	break;
	} else if (InputUsed[OpNo] == -1U) {
	// Create a new operand for this input vector.
	InputUsed[OpNo] = Input;
	break;
	}
	}

	if (OpNo >= array_lengthof(InputUsed)) {
	// More than two input vectors used! Give up on trying to create a
	// shuffle vector. Insert all elements into a BUILD_VECTOR instead.
	useBuildVector = true;
	break;
	}

	// Add the mask index for the new shuffle vector.
	Ops.push_back(Idx + OpNo * NewElts);
	}

	if (useBuildVector) {
	EVT EltVT = NewVT.getVectorElementType();
	SmallVector<SDValue, 16> SVOps;

	// Extract the input elements by hand.
	for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
	// The mask element. This indexes into the input.
	int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);

	// The input vector this mask element indexes into.
	unsigned Input = (unsigned)Idx / NewElts;

	if (Input >= array_lengthof(Inputs)) {
	// The mask element is "undef" or indexes off the end of the input.
	SVOps.push_back(DAG.getUNDEF(EltVT));
	continue;
	}

	// Turn the index into an offset from the start of the input vector.
	Idx -= Input * NewElts;

	// Extract the vector element by hand.
	SVOps.push_back(DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input],
	DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
	}

	// Construct the Lo/Hi output using a BUILD_VECTOR.
	Output = DAG.getBuildVector(NewVT, dl, SVOps);
	} else if (InputUsed[0] == -1U) {
	// No input vectors were used! The result is undefined.
	Output = DAG.getUNDEF(NewVT);
	} else {
	SDValue Op0 = Inputs[InputUsed[0]];
	// If only one input was used, use an undefined vector for the other.
	SDValue Op1 = InputUsed[1] == -1U ?
	DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
	// At least one input vector was used. Create a new shuffle vector.
	Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops);
	}

	Ops.clear();
	}
	}


	//===----------------------------------------------------------------------===//
	// Operand Vector Splitting
	//===----------------------------------------------------------------------===//

	/// This method is called when the specified operand of the specified node is
	/// found to need vector splitting. At this point, all of the result types of
	/// the node are known to be legal, but other operands of the node may need
	/// legalization as well as the specified one.
	bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
	DEBUG(dbgs() << "Split node operand: ";
	N->dump(&DAG);
	dbgs() << "\n");
	SDValue Res = SDValue();

	// See if the target wants to custom split this node.
	if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
	return false;

	if (!Res.getNode()) {
	switch (N->getOpcode()) {
	default:
	#ifndef NDEBUG
	dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
	N->dump(&DAG);
	dbgs() << "\n";
	#endif
	report_fatal_error("Do not know how to split this operator's "
	"operand!\n");

	case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
	case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
	case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
	case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
	case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
	case ISD::TRUNCATE:
	Res = SplitVecOp_TruncateHelper(N);
	break;
	case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
	case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
	case ISD::STORE:
	Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
	break;
	case ISD::MSTORE:
	Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
	break;
	case ISD::MSCATTER:
	Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
	break;
	case ISD::MGATHER:
	Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
	break;
	case ISD::VSELECT:
	Res = SplitVecOp_VSELECT(N, OpNo);
	break;
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
	Res = SplitVecOp_TruncateHelper(N);
	else
	Res = SplitVecOp_UnaryOp(N);
	break;
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP:
	if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
	Res = SplitVecOp_TruncateHelper(N);
	else
	Res = SplitVecOp_UnaryOp(N);
	break;
	case ISD::CTTZ:
	case ISD::CTLZ:
	case ISD::CTPOP:
	case ISD::FP_EXTEND:
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND:
	case ISD::FTRUNC:
	case ISD::FCANONICALIZE:
	Res = SplitVecOp_UnaryOp(N);
	break;

	case ISD::ANY_EXTEND_VECTOR_INREG:
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	Res = SplitVecOp_ExtVecInRegOp(N);
	break;

	case ISD::VECREDUCE_FADD:
	case ISD::VECREDUCE_FMUL:
	case ISD::VECREDUCE_ADD:
	case ISD::VECREDUCE_MUL:
	case ISD::VECREDUCE_AND:
	case ISD::VECREDUCE_OR:
	case ISD::VECREDUCE_XOR:
	case ISD::VECREDUCE_SMAX:
	case ISD::VECREDUCE_SMIN:
	case ISD::VECREDUCE_UMAX:
	case ISD::VECREDUCE_UMIN:
	case ISD::VECREDUCE_FMAX:
	case ISD::VECREDUCE_FMIN:
	Res = SplitVecOp_VECREDUCE(N, OpNo);
	break;
	}
	}

	// If the result is null, the sub-method took care of registering results etc.
	if (!Res.getNode()) return false;

	// If the result is N, the sub-method updated N in place. Tell the legalizer
	// core about this.
	if (Res.getNode() == N)
	return true;

	assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
	"Invalid operand expansion");

	ReplaceValueWith(SDValue(N, 0), Res);
	return false;
	}

	SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
	// The only possibility for an illegal operand is the mask, since result type
	// legalization would have handled this node already otherwise.
	assert(OpNo == 0 && "Illegal operand must be mask");

	SDValue Mask = N->getOperand(0);
	SDValue Src0 = N->getOperand(1);
	SDValue Src1 = N->getOperand(2);
	EVT Src0VT = Src0.getValueType();
	SDLoc DL(N);
	assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?");

	SDValue Lo, Hi;
	GetSplitVector(N->getOperand(0), Lo, Hi);
	assert(Lo.getValueType() == Hi.getValueType() &&
	"Lo and Hi have differing types");

	EVT LoOpVT, HiOpVT;
	std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
	assert(LoOpVT == HiOpVT && "Asymmetric vector split?");

	SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
	std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
	std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
	std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);

	SDValue LoSelect =
	DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
	SDValue HiSelect =
	DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);

	return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
	}

	SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
	EVT ResVT = N->getValueType(0);
	SDValue Lo, Hi;
	SDLoc dl(N);

	SDValue VecOp = N->getOperand(OpNo);
	EVT VecVT = VecOp.getValueType();
	assert(VecVT.isVector() && "Can only split reduce vector operand");
	GetSplitVector(VecOp, Lo, Hi);
	EVT LoOpVT, HiOpVT;
	std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);

	bool NoNaN = N->getFlags().hasNoNaNs();
	unsigned CombineOpc = 0;
	switch (N->getOpcode()) {
	case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break;
	case ISD::VECREDUCE_FMUL: CombineOpc = ISD::FMUL; break;
	case ISD::VECREDUCE_ADD: CombineOpc = ISD::ADD; break;
	case ISD::VECREDUCE_MUL: CombineOpc = ISD::MUL; break;
	case ISD::VECREDUCE_AND: CombineOpc = ISD::AND; break;
	case ISD::VECREDUCE_OR: CombineOpc = ISD::OR; break;
	case ISD::VECREDUCE_XOR: CombineOpc = ISD::XOR; break;
	case ISD::VECREDUCE_SMAX: CombineOpc = ISD::SMAX; break;
	case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break;
	case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break;
	case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break;
	case ISD::VECREDUCE_FMAX:
	CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXNAN;
	break;
	case ISD::VECREDUCE_FMIN:
	CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINNAN;
	break;
	default:
	llvm_unreachable("Unexpected reduce ISD node");
	}

	// Use the appropriate scalar instruction on the split subvectors before
	// reducing the now partially reduced smaller vector.
	SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi);
	return DAG.getNode(N->getOpcode(), dl, ResVT, Partial);
	}

	SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
	// The result has a legal vector type, but the input needs splitting.
	EVT ResVT = N->getValueType(0);
	SDValue Lo, Hi;
	SDLoc dl(N);
	GetSplitVector(N->getOperand(0), Lo, Hi);
	EVT InVT = Lo.getValueType();

	EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
	InVT.getVectorNumElements());

	Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
	Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
	}

	SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
	// For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
	// end up being split all the way down to individual components. Convert the
	// split pieces into integers and reassemble.
	SDValue Lo, Hi;
	GetSplitVector(N->getOperand(0), Lo, Hi);
	Lo = BitConvertToInteger(Lo);
	Hi = BitConvertToInteger(Hi);

	if (DAG.getDataLayout().isBigEndian())
	std::swap(Lo, Hi);

	return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
	JoinIntegers(Lo, Hi));
	}

	SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
	// We know that the extracted result type is legal.
	EVT SubVT = N->getValueType(0);
	SDValue Idx = N->getOperand(1);
	SDLoc dl(N);
	SDValue Lo, Hi;
	GetSplitVector(N->getOperand(0), Lo, Hi);

	uint64_t LoElts = Lo.getValueType().getVectorNumElements();
	uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();

	if (IdxVal < LoElts) {
	assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
	"Extracted subvector crosses vector split!");
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
	} else {
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
	DAG.getConstant(IdxVal - LoElts, dl,
	Idx.getValueType()));
	}
	}

	SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
	SDValue Vec = N->getOperand(0);
	SDValue Idx = N->getOperand(1);
	EVT VecVT = Vec.getValueType();

	if (isa<ConstantSDNode>(Idx)) {
	uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
	assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");

	SDValue Lo, Hi;
	GetSplitVector(Vec, Lo, Hi);

	uint64_t LoElts = Lo.getValueType().getVectorNumElements();

	if (IdxVal < LoElts)
	return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
	return SDValue(DAG.UpdateNodeOperands(N, Hi,
	DAG.getConstant(IdxVal - LoElts, SDLoc(N),
	Idx.getValueType())), 0);
	}

	// See if the target wants to custom expand this node.
	if (CustomLowerNode(N, N->getValueType(0), true))
	return SDValue();

	// Make the vector elements byte-addressable if they aren't already.
	SDLoc dl(N);
	EVT EltVT = VecVT.getVectorElementType();
	if (EltVT.getSizeInBits() < 8) {
	SmallVector<SDValue, 4> ElementOps;
	for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) {
	ElementOps.push_back(DAG.getAnyExtOrTrunc(
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec,
	DAG.getConstant(i, dl, MVT::i8)),
	dl, MVT::i8));
	}

	EltVT = MVT::i8;
	VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
	VecVT.getVectorNumElements());
	Vec = DAG.getBuildVector(VecVT, dl, ElementOps);
	}

	// Store the vector to the stack.
	SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
	SDValue Store =
	DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());

	// Load back the required element.
	StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
	return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
	MachinePointerInfo(), EltVT);
	}

	SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
	SDValue Lo, Hi;

	// *_EXTEND_VECTOR_INREG only reference the lower half of the input, so
	// splitting the result has the same effect as splitting the input operand.
	SplitVecRes_ExtVecInRegOp(N, Lo, Hi);

	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
	}

	SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
	unsigned OpNo) {
	EVT LoVT, HiVT;
	SDLoc dl(MGT);
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));

	SDValue Ch = MGT->getChain();
	SDValue Ptr = MGT->getBasePtr();
	SDValue Index = MGT->getIndex();
	SDValue Mask = MGT->getMask();
	SDValue Src0 = MGT->getValue();
	unsigned Alignment = MGT->getOriginalAlignment();

	SDValue MaskLo, MaskHi;
	if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
	// Split Mask operand
	GetSplitVector(Mask, MaskLo, MaskHi);
	else
	std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);

	EVT MemoryVT = MGT->getMemoryVT();
	EVT LoMemVT, HiMemVT;
	std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

	SDValue Src0Lo, Src0Hi;
	if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
	GetSplitVector(Src0, Src0Lo, Src0Hi);
	else
	std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);

	SDValue IndexHi, IndexLo;
	if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
	GetSplitVector(Index, IndexLo, IndexHi);
	else
	std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);

	MachineMemOperand *MMO = DAG.getMachineFunction().
	getMachineMemOperand(MGT->getPointerInfo(),
	MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
	Alignment, MGT->getAAInfo(), MGT->getRanges());

	SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
	SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
	OpsLo, MMO);

	MMO = DAG.getMachineFunction().
	getMachineMemOperand(MGT->getPointerInfo(),
	MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
	Alignment, MGT->getAAInfo(),
	MGT->getRanges());

	SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
	SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
	OpsHi, MMO);

	// Build a factor node to remember that this load is independent of the
	// other one.
	Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
	Hi.getValue(1));

	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(MGT, 1), Ch);

	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo,
	Hi);
	ReplaceValueWith(SDValue(MGT, 0), Res);
	return SDValue();
	}

	SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
	unsigned OpNo) {
	SDValue Ch = N->getChain();
	SDValue Ptr = N->getBasePtr();
	SDValue Mask = N->getMask();
	SDValue Data = N->getValue();
	EVT MemoryVT = N->getMemoryVT();
	unsigned Alignment = N->getOriginalAlignment();
	SDLoc DL(N);

	EVT LoMemVT, HiMemVT;
	std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

	SDValue DataLo, DataHi;
	if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
	// Split Data operand
	GetSplitVector(Data, DataLo, DataHi);
	else
	std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);

	SDValue MaskLo, MaskHi;
	if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
	// Split Mask operand
	GetSplitVector(Mask, MaskLo, MaskHi);
	else
	std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);

	MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType());
	MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType());

	// if Alignment is equal to the vector size,
	// take the half of it for the second part
	unsigned SecondHalfAlignment =
	(Alignment == Data->getValueType(0).getSizeInBits()/8) ?
	Alignment/2 : Alignment;

	SDValue Lo, Hi;
	MachineMemOperand *MMO = DAG.getMachineFunction().
	getMachineMemOperand(N->getPointerInfo(),
	MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
	Alignment, N->getAAInfo(), N->getRanges());

	Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
	N->isTruncatingStore(),
	N->isCompressingStore());

	Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
	N->isCompressingStore());
	MMO = DAG.getMachineFunction().
	getMachineMemOperand(N->getPointerInfo(),
	MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
	SecondHalfAlignment, N->getAAInfo(), N->getRanges());

	Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
	N->isTruncatingStore(), N->isCompressingStore());

	// Build a factor node to remember that this store is independent of the
	// other one.
	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
	}

	SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
	unsigned OpNo) {
	SDValue Ch = N->getChain();
	SDValue Ptr = N->getBasePtr();
	SDValue Mask = N->getMask();
	SDValue Index = N->getIndex();
	SDValue Data = N->getValue();
	EVT MemoryVT = N->getMemoryVT();
	unsigned Alignment = N->getOriginalAlignment();
	SDLoc DL(N);

	// Split all operands
	EVT LoMemVT, HiMemVT;
	std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

	SDValue DataLo, DataHi;
	if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
	// Split Data operand
	GetSplitVector(Data, DataLo, DataHi);
	else
	std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);

	SDValue MaskLo, MaskHi;
	if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
	// Split Mask operand
	GetSplitVector(Mask, MaskLo, MaskHi);
	else
	std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);

	SDValue IndexHi, IndexLo;
	if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
	GetSplitVector(Index, IndexLo, IndexHi);
	else
	std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);

	SDValue Lo, Hi;
	MachineMemOperand *MMO = DAG.getMachineFunction().
	getMachineMemOperand(N->getPointerInfo(),
	MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
	Alignment, N->getAAInfo(), N->getRanges());

	SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo};
	Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
	DL, OpsLo, MMO);

	MMO = DAG.getMachineFunction().
	getMachineMemOperand(N->getPointerInfo(),
	MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
	Alignment, N->getAAInfo(), N->getRanges());

	SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi};
	Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
	DL, OpsHi, MMO);

	// Build a factor node to remember that this store is independent of the
	// other one.
	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
	}

	SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
	assert(N->isUnindexed() && "Indexed store of vector?");
	assert(OpNo == 1 && "Can only split the stored value");
	SDLoc DL(N);

	bool isTruncating = N->isTruncatingStore();
	SDValue Ch = N->getChain();
	SDValue Ptr = N->getBasePtr();
	EVT MemoryVT = N->getMemoryVT();
	unsigned Alignment = N->getOriginalAlignment();
	MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
	AAMDNodes AAInfo = N->getAAInfo();
	SDValue Lo, Hi;
	GetSplitVector(N->getOperand(1), Lo, Hi);

	EVT LoMemVT, HiMemVT;
	std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

	unsigned IncrementSize = LoMemVT.getSizeInBits()/8;

	if (isTruncating)
	Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT,
	Alignment, MMOFlags, AAInfo);
	else
	Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
	AAInfo);

	// Increment the pointer to the other half.
	Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
	DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));

	if (isTruncating)
	Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
	N->getPointerInfo().getWithOffset(IncrementSize),
	HiMemVT, Alignment, MMOFlags, AAInfo);
	else
	Hi = DAG.getStore(Ch, DL, Hi, Ptr,
	N->getPointerInfo().getWithOffset(IncrementSize),
	Alignment, MMOFlags, AAInfo);

	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
	}

	SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
	SDLoc DL(N);

	// The input operands all must have the same type, and we know the result
	// type is valid. Convert this to a buildvector which extracts all the
	// input elements.
	// TODO: If the input elements are power-two vectors, we could convert this to
	// a new CONCAT_VECTORS node with elements that are half-wide.
	SmallVector<SDValue, 32> Elts;
	EVT EltVT = N->getValueType(0).getVectorElementType();
	for (const SDValue &Op : N->op_values()) {
	for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
	i != e; ++i) {
	Elts.push_back(DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op,
	DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
	}
	}

	return DAG.getBuildVector(N->getValueType(0), DL, Elts);
	}

	SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
	// The result type is legal, but the input type is illegal. If splitting
	// ends up with the result type of each half still being legal, just
	// do that. If, however, that would result in an illegal result type,
	// we can try to get more clever with power-two vectors. Specifically,
	// split the input type, but also widen the result element size, then
	// concatenate the halves and truncate again. For example, consider a target
	// where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
	// vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
	// %inlo = v4i32 extract_subvector %in, 0
	// %inhi = v4i32 extract_subvector %in, 4
	// %lo16 = v4i16 trunc v4i32 %inlo
	// %hi16 = v4i16 trunc v4i32 %inhi
	// %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
	// %res = v8i8 trunc v8i16 %in16
	//
	// Without this transform, the original truncate would end up being
	// scalarized, which is pretty much always a last resort.
	SDValue InVec = N->getOperand(0);
	EVT InVT = InVec->getValueType(0);
	EVT OutVT = N->getValueType(0);
	unsigned NumElements = OutVT.getVectorNumElements();
	bool IsFloat = OutVT.isFloatingPoint();

	// Widening should have already made sure this is a power-two vector
	// if we're trying to split it at all. assert() that's true, just in case.
	assert(!(NumElements & 1) && "Splitting vector, but not in half!");

	unsigned InElementSize = InVT.getScalarSizeInBits();
	unsigned OutElementSize = OutVT.getScalarSizeInBits();

	// If the input elements are only 1/2 the width of the result elements,
	// just use the normal splitting. Our trick only work if there's room
	// to split more than once.
	if (InElementSize <= OutElementSize * 2)
	return SplitVecOp_UnaryOp(N);
	SDLoc DL(N);

	// Extract the halves of the input via extract_subvector.
	SDValue InLoVec, InHiVec;
	std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
	// Truncate them to 1/2 the element size.
	EVT HalfElementVT = IsFloat ?
	EVT::getFloatingPointVT(InElementSize/2) :
	EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
	EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
	NumElements/2);
	SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
	SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
	// Concatenate them to get the full intermediate truncation result.
	EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
	SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
	HalfHi);
	// Now finish up by truncating all the way down to the original result
	// type. This should normally be something that ends up being legal directly,
	// but in theory if a target has very wide vectors and an annoyingly
	// restricted set of legal types, this split can chain to build things up.
	return IsFloat
	? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
	DAG.getTargetConstant(
	0, DL, TLI.getPointerTy(DAG.getDataLayout())))
	: DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
	}

	SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
	assert(N->getValueType(0).isVector() &&
	N->getOperand(0).getValueType().isVector() &&
	"Operand types must be vectors");
	// The result has a legal vector type, but the input needs splitting.
	SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
	SDLoc DL(N);
	GetSplitVector(N->getOperand(0), Lo0, Hi0);
	GetSplitVector(N->getOperand(1), Lo1, Hi1);
	unsigned PartElements = Lo0.getValueType().getVectorNumElements();
	EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
	EVT WideResVT = EVT::getVectorVT(DAG.getContext(), MVT::i1, 2PartElements);

	LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
	HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
	SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
	return PromoteTargetBoolean(Con, N->getValueType(0));
	}


	SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
	// The result has a legal vector type, but the input needs splitting.
	EVT ResVT = N->getValueType(0);
	SDValue Lo, Hi;
	SDLoc DL(N);
	GetSplitVector(N->getOperand(0), Lo, Hi);
	EVT InVT = Lo.getValueType();

	EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
	InVT.getVectorNumElements());

	Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
	Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));

	return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
	}

	SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
	// The result (and the first input) has a legal vector type, but the second
	// input needs splitting.
	return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
	}


	//===----------------------------------------------------------------------===//
	// Result Vector Widening
	//===----------------------------------------------------------------------===//

	void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
	DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
	N->dump(&DAG);
	dbgs() << "\n");

	// See if the target wants to custom widen this node.
	if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
	return;

	SDValue Res = SDValue();
	switch (N->getOpcode()) {
	default:
	#ifndef NDEBUG
	dbgs() << "WidenVectorResult #" << ResNo << ": ";
	N->dump(&DAG);
	dbgs() << "\n";
	#endif
	llvm_unreachable("Do not know how to widen the result of this operator!");

	case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
	case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
	case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
	case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
	case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
	case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
	case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
	case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
	case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
	case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
	case ISD::VSELECT:
	case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
	case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
	case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
	case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
	case ISD::VECTOR_SHUFFLE:
	Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
	break;
	case ISD::MLOAD:
	Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
	break;
	case ISD::MGATHER:
	Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
	break;

	case ISD::ADD:
	case ISD::AND:
	case ISD::MUL:
	case ISD::MULHS:
	case ISD::MULHU:
	case ISD::OR:
	case ISD::SUB:
	case ISD::XOR:
	case ISD::FMINNUM:
	case ISD::FMAXNUM:
	case ISD::FMINNAN:
	case ISD::FMAXNAN:
	case ISD::SMIN:
	case ISD::SMAX:
	case ISD::UMIN:
	case ISD::UMAX:
	Res = WidenVecRes_Binary(N);
	break;

	case ISD::FADD:
	case ISD::FMUL:
	case ISD::FPOW:
	case ISD::FSUB:
	case ISD::FDIV:
	case ISD::FREM:
	case ISD::SDIV:
	case ISD::UDIV:
	case ISD::SREM:
	case ISD::UREM:
	Res = WidenVecRes_BinaryCanTrap(N);
	break;

	case ISD::FCOPYSIGN:
	Res = WidenVecRes_FCOPYSIGN(N);
	break;

	case ISD::FPOWI:
	Res = WidenVecRes_POWI(N);
	break;

	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL:
	Res = WidenVecRes_Shift(N);
	break;

	case ISD::ANY_EXTEND_VECTOR_INREG:
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	Res = WidenVecRes_EXTEND_VECTOR_INREG(N);
	break;

	case ISD::ANY_EXTEND:
	case ISD::FP_EXTEND:
	case ISD::FP_ROUND:
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	case ISD::SIGN_EXTEND:
	case ISD::SINT_TO_FP:
	case ISD::TRUNCATE:
	case ISD::UINT_TO_FP:
	case ISD::ZERO_EXTEND:
	Res = WidenVecRes_Convert(N);
	break;

	case ISD::BITREVERSE:
	case ISD::BSWAP:
	case ISD::CTLZ:
	case ISD::CTPOP:
	case ISD::CTTZ:
	case ISD::FABS:
	case ISD::FCEIL:
	case ISD::FCOS:
	case ISD::FEXP:
	case ISD::FEXP2:
	case ISD::FFLOOR:
	case ISD::FLOG:
	case ISD::FLOG10:
	case ISD::FLOG2:
	case ISD::FNEARBYINT:
	case ISD::FNEG:
	case ISD::FRINT:
	case ISD::FROUND:
	case ISD::FSIN:
	case ISD::FSQRT:
	case ISD::FTRUNC:
	Res = WidenVecRes_Unary(N);
	break;
	case ISD::FMA:
	Res = WidenVecRes_Ternary(N);
	break;
	}

	// If Res is null, the sub-method took care of registering the result.
	if (Res.getNode())
	SetWidenedVector(SDValue(N, ResNo), Res);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
	// Ternary op widening.
	SDLoc dl(N);
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDValue InOp1 = GetWidenedVector(N->getOperand(0));
	SDValue InOp2 = GetWidenedVector(N->getOperand(1));
	SDValue InOp3 = GetWidenedVector(N->getOperand(2));
	return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
	// Binary op widening.
	SDLoc dl(N);
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDValue InOp1 = GetWidenedVector(N->getOperand(0));
	SDValue InOp2 = GetWidenedVector(N->getOperand(1));
	return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
	}

	SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
	// Binary op widening for operations that can trap.
	unsigned Opcode = N->getOpcode();
	SDLoc dl(N);
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	EVT WidenEltVT = WidenVT.getVectorElementType();
	EVT VT = WidenVT;
	unsigned NumElts = VT.getVectorNumElements();
	const SDNodeFlags Flags = N->getFlags();
	while (!TLI.isTypeLegal(VT) && NumElts != 1) {
	NumElts = NumElts / 2;
	VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
	}

	if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
	// Operation doesn't trap so just widen as normal.
	SDValue InOp1 = GetWidenedVector(N->getOperand(0));
	SDValue InOp2 = GetWidenedVector(N->getOperand(1));
	return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
	}

	// No legal vector version so unroll the vector operation and then widen.
	if (NumElts == 1)
	return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());

	// Since the operation can trap, apply operation on the original vector.
	EVT MaxVT = VT;
	SDValue InOp1 = GetWidenedVector(N->getOperand(0));
	SDValue InOp2 = GetWidenedVector(N->getOperand(1));
	unsigned CurNumElts = N->getValueType(0).getVectorNumElements();

	SmallVector<SDValue, 16> ConcatOps(CurNumElts);
	unsigned ConcatEnd = 0; // Current ConcatOps index.
	int Idx = 0; // Current Idx into input vectors.

	// NumElts := greatest legal vector size (at most WidenVT)
	// while (orig. vector has unhandled elements) {
	// take munches of size NumElts from the beginning and add to ConcatOps
	// NumElts := next smaller supported vector size or 1
	// }
	while (CurNumElts != 0) {
	while (CurNumElts >= NumElts) {
	SDValue EOp1 = DAG.getNode(
	ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
	DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
	SDValue EOp2 = DAG.getNode(
	ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
	DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
	ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
	Idx += NumElts;
	CurNumElts -= NumElts;
	}
	do {
	NumElts = NumElts / 2;
	VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
	} while (!TLI.isTypeLegal(VT) && NumElts != 1);

	if (NumElts == 1) {
	for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
	SDValue EOp1 = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1,
	DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
	SDValue EOp2 = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
	DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
	ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
	EOp1, EOp2, Flags);
	}
	CurNumElts = 0;
	}
	}

	// Check to see if we have a single operation with the widen type.
	if (ConcatEnd == 1) {
	VT = ConcatOps[0].getValueType();
	if (VT == WidenVT)
	return ConcatOps[0];
	}

	// while (Some element of ConcatOps is not of type MaxVT) {
	// From the end of ConcatOps, collect elements of the same type and put
	// them into an op of the next larger supported type
	// }
	while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
	Idx = ConcatEnd - 1;
	VT = ConcatOps[Idx--].getValueType();
	while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
	Idx--;

	int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
	EVT NextVT;
	do {
	NextSize *= 2;
	NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
	} while (!TLI.isTypeLegal(NextVT));

	if (!VT.isVector()) {
	// Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
	SDValue VecOp = DAG.getUNDEF(NextVT);
	unsigned NumToInsert = ConcatEnd - Idx - 1;
	for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
	VecOp = DAG.getNode(
	ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
	DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
	}
	ConcatOps[Idx+1] = VecOp;
	ConcatEnd = Idx + 2;
	} else {
	// Vector type, create a CONCAT_VECTORS of type NextVT
	SDValue undefVec = DAG.getUNDEF(VT);
	unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
	SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
	unsigned RealVals = ConcatEnd - Idx - 1;
	unsigned SubConcatEnd = 0;
	unsigned SubConcatIdx = Idx + 1;
	while (SubConcatEnd < RealVals)
	SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
	while (SubConcatEnd < OpsToConcat)
	SubConcatOps[SubConcatEnd++] = undefVec;
	ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
	NextVT, SubConcatOps);
	ConcatEnd = SubConcatIdx + 1;
	}
	}

	// Check to see if we have a single operation with the widen type.
	if (ConcatEnd == 1) {
	VT = ConcatOps[0].getValueType();
	if (VT == WidenVT)
	return ConcatOps[0];
	}

	// add undefs of size MaxVT until ConcatOps grows to length of WidenVT
	unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
	if (NumOps != ConcatEnd ) {
	SDValue UndefVal = DAG.getUNDEF(MaxVT);
	for (unsigned j = ConcatEnd; j < NumOps; ++j)
	ConcatOps[j] = UndefVal;
	}
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
	makeArrayRef(ConcatOps.data(), NumOps));
	}

	SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
	SDValue InOp = N->getOperand(0);
	SDLoc DL(N);

	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	unsigned WidenNumElts = WidenVT.getVectorNumElements();

	EVT InVT = InOp.getValueType();
	EVT InEltVT = InVT.getVectorElementType();
	EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);

	unsigned Opcode = N->getOpcode();
	unsigned InVTNumElts = InVT.getVectorNumElements();
	const SDNodeFlags Flags = N->getFlags();
	if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
	InOp = GetWidenedVector(N->getOperand(0));
	InVT = InOp.getValueType();
	InVTNumElts = InVT.getVectorNumElements();
	if (InVTNumElts == WidenNumElts) {
	if (N->getNumOperands() == 1)
	return DAG.getNode(Opcode, DL, WidenVT, InOp);
	return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
	}
	if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) {
	// If both input and result vector types are of same width, extend
	// operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
	// accepts fewer elements in the result than in the input.
	if (Opcode == ISD::SIGN_EXTEND)
	return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
	if (Opcode == ISD::ZERO_EXTEND)
	return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
	}
	}

	if (TLI.isTypeLegal(InWidenVT)) {
	// Because the result and the input are different vector types, widening
	// the result could create a legal type but widening the input might make
	// it an illegal type that might lead to repeatedly splitting the input
	// and then widening it. To avoid this, we widen the input only if
	// it results in a legal type.
	if (WidenNumElts % InVTNumElts == 0) {
	// Widen the input and call convert on the widened input vector.
	unsigned NumConcat = WidenNumElts/InVTNumElts;
	SmallVector<SDValue, 16> Ops(NumConcat);
	Ops[0] = InOp;
	SDValue UndefVal = DAG.getUNDEF(InVT);
	for (unsigned i = 1; i != NumConcat; ++i)
	Ops[i] = UndefVal;
	SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
	if (N->getNumOperands() == 1)
	return DAG.getNode(Opcode, DL, WidenVT, InVec);
	return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
	}

	if (InVTNumElts % WidenNumElts == 0) {
	SDValue InVal = DAG.getNode(
	ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
	DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
	// Extract the input and convert the shorten input vector.
	if (N->getNumOperands() == 1)
	return DAG.getNode(Opcode, DL, WidenVT, InVal);
	return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
	}
	}

	// Otherwise unroll into some nasty scalar code and rebuild the vector.
	SmallVector<SDValue, 16> Ops(WidenNumElts);
	EVT EltVT = WidenVT.getVectorElementType();
	unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
	unsigned i;
	for (i=0; i < MinElts; ++i) {
	SDValue Val = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
	DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
	if (N->getNumOperands() == 1)
	Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
	else
	Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
	}

	SDValue UndefVal = DAG.getUNDEF(EltVT);
	for (; i < WidenNumElts; ++i)
	Ops[i] = UndefVal;

	return DAG.getBuildVector(WidenVT, DL, Ops);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
	unsigned Opcode = N->getOpcode();
	SDValue InOp = N->getOperand(0);
	SDLoc DL(N);

	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	EVT WidenSVT = WidenVT.getVectorElementType();
	unsigned WidenNumElts = WidenVT.getVectorNumElements();

	EVT InVT = InOp.getValueType();
	EVT InSVT = InVT.getVectorElementType();
	unsigned InVTNumElts = InVT.getVectorNumElements();

	if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
	InOp = GetWidenedVector(InOp);
	InVT = InOp.getValueType();
	if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) {
	switch (Opcode) {
	case ISD::ANY_EXTEND_VECTOR_INREG:
	return DAG.getAnyExtendVectorInReg(InOp, DL, WidenVT);
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
	}
	}
	}

	// Unroll, extend the scalars and rebuild the vector.
	SmallVector<SDValue, 16> Ops;
	for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) {
	SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp,
	DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
	switch (Opcode) {
	case ISD::ANY_EXTEND_VECTOR_INREG:
	Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val);
	break;
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val);
	break;
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val);
	break;
	default:
	llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected");
	}
	Ops.push_back(Val);
	}

	while (Ops.size() != WidenNumElts)
	Ops.push_back(DAG.getUNDEF(WidenSVT));

	return DAG.getBuildVector(WidenVT, DL, Ops);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
	// If this is an FCOPYSIGN with same input types, we can treat it as a
	// normal (can trap) binary op.
	if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
	return WidenVecRes_BinaryCanTrap(N);

	// If the types are different, fall back to unrolling.
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
	}

	SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDValue InOp = GetWidenedVector(N->getOperand(0));
	SDValue ShOp = N->getOperand(1);
	return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDValue InOp = GetWidenedVector(N->getOperand(0));
	SDValue ShOp = N->getOperand(1);

	EVT ShVT = ShOp.getValueType();
	if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
	ShOp = GetWidenedVector(ShOp);
	ShVT = ShOp.getValueType();
	}
	EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
	ShVT.getVectorElementType(),
	WidenVT.getVectorNumElements());
	if (ShVT != ShWidenVT)
	ShOp = ModifyToType(ShOp, ShWidenVT);

	return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
	// Unary op widening.
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDValue InOp = GetWidenedVector(N->getOperand(0));
	return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
	cast<VTSDNode>(N->getOperand(1))->getVT()
	.getVectorElementType(),
	WidenVT.getVectorNumElements());
	SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
	return DAG.getNode(N->getOpcode(), SDLoc(N),
	WidenVT, WidenLHS, DAG.getValueType(ExtVT));
	}

	SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
	SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
	return GetWidenedVector(WidenVec);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
	SDValue InOp = N->getOperand(0);
	EVT InVT = InOp.getValueType();
	EVT VT = N->getValueType(0);
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	SDLoc dl(N);

	switch (getTypeAction(InVT)) {
	case TargetLowering::TypeLegal:
	break;
	case TargetLowering::TypePromoteInteger:
	// If the incoming type is a vector that is being promoted, then
	// we know that the elements are arranged differently and that we
	// must perform the conversion using a stack slot.
	if (InVT.isVector())
	break;

	// If the InOp is promoted to the same size, convert it. Otherwise,
	// fall out of the switch and widen the promoted input.
	InOp = GetPromotedInteger(InOp);
	InVT = InOp.getValueType();
	if (WidenVT.bitsEq(InVT))
	return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
	break;
	case TargetLowering::TypeSoftenFloat:
	case TargetLowering::TypePromoteFloat:
	case TargetLowering::TypeExpandInteger:
	case TargetLowering::TypeExpandFloat:
	case TargetLowering::TypeScalarizeVector:
	case TargetLowering::TypeSplitVector:
	break;
	case TargetLowering::TypeWidenVector:
	// If the InOp is widened to the same size, convert it. Otherwise, fall
	// out of the switch and widen the widened input.
	InOp = GetWidenedVector(InOp);
	InVT = InOp.getValueType();
	if (WidenVT.bitsEq(InVT))
	// The input widens to the same size. Convert to the widen value.
	return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
	break;
	}

	unsigned WidenSize = WidenVT.getSizeInBits();
	unsigned InSize = InVT.getSizeInBits();
	// x86mmx is not an acceptable vector element type, so don't try.
	if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
	// Determine new input vector type. The new input vector type will use
	// the same element type (if its a vector) or use the input type as a
	// vector. It is the same size as the type to widen to.
	EVT NewInVT;
	unsigned NewNumElts = WidenSize / InSize;
	if (InVT.isVector()) {
	EVT InEltVT = InVT.getVectorElementType();
	NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
	WidenSize / InEltVT.getSizeInBits());
	} else {
	NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
	}

	if (TLI.isTypeLegal(NewInVT)) {
	// Because the result and the input are different vector types, widening
	// the result could create a legal type but widening the input might make
	// it an illegal type that might lead to repeatedly splitting the input
	// and then widening it. To avoid this, we widen the input only if
	// it results in a legal type.
	SmallVector<SDValue, 16> Ops(NewNumElts);
	SDValue UndefVal = DAG.getUNDEF(InVT);
	Ops[0] = InOp;
	for (unsigned i = 1; i < NewNumElts; ++i)
	Ops[i] = UndefVal;

	SDValue NewVec;
	if (InVT.isVector())
	NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
	else
	NewVec = DAG.getBuildVector(NewInVT, dl, Ops);
	return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
	}
	}

	return CreateStackStoreLoad(InOp, WidenVT);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
	SDLoc dl(N);
	// Build a vector with undefined for the new nodes.
	EVT VT = N->getValueType(0);

	// Integer BUILD_VECTOR operands may be larger than the node's vector element
	// type. The UNDEFs need to have the same type as the existing operands.
	EVT EltVT = N->getOperand(0).getValueType();
	unsigned NumElts = VT.getVectorNumElements();

	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	unsigned WidenNumElts = WidenVT.getVectorNumElements();

	SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
	assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
	NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));

	return DAG.getBuildVector(WidenVT, dl, NewOps);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
	EVT InVT = N->getOperand(0).getValueType();
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDLoc dl(N);
	unsigned WidenNumElts = WidenVT.getVectorNumElements();
	unsigned NumInElts = InVT.getVectorNumElements();
	unsigned NumOperands = N->getNumOperands();

	bool InputWidened = false; // Indicates we need to widen the input.
	if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
	if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
	// Add undef vectors to widen to correct length.
	unsigned NumConcat = WidenVT.getVectorNumElements() /
	InVT.getVectorNumElements();
	SDValue UndefVal = DAG.getUNDEF(InVT);
	SmallVector<SDValue, 16> Ops(NumConcat);
	for (unsigned i=0; i < NumOperands; ++i)
	Ops[i] = N->getOperand(i);
	for (unsigned i = NumOperands; i != NumConcat; ++i)
	Ops[i] = UndefVal;
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops);
	}
	} else {
	InputWidened = true;
	if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
	// The inputs and the result are widen to the same value.
	unsigned i;
	for (i=1; i < NumOperands; ++i)
	if (!N->getOperand(i).isUndef())
	break;

	if (i == NumOperands)
	// Everything but the first operand is an UNDEF so just return the
	// widened first operand.
	return GetWidenedVector(N->getOperand(0));

	if (NumOperands == 2) {
	// Replace concat of two operands with a shuffle.
	SmallVector<int, 16> MaskOps(WidenNumElts, -1);
	for (unsigned i = 0; i < NumInElts; ++i) {
	MaskOps[i] = i;
	MaskOps[i + NumInElts] = i + WidenNumElts;
	}
	return DAG.getVectorShuffle(WidenVT, dl,
	GetWidenedVector(N->getOperand(0)),
	GetWidenedVector(N->getOperand(1)),
	MaskOps);
	}
	}
	}

	// Fall back to use extracts and build vector.
	EVT EltVT = WidenVT.getVectorElementType();
	SmallVector<SDValue, 16> Ops(WidenNumElts);
	unsigned Idx = 0;
	for (unsigned i=0; i < NumOperands; ++i) {
	SDValue InOp = N->getOperand(i);
	if (InputWidened)
	InOp = GetWidenedVector(InOp);
	for (unsigned j=0; j < NumInElts; ++j)
	Ops[Idx++] = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
	DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
	}
	SDValue UndefVal = DAG.getUNDEF(EltVT);
	for (; Idx < WidenNumElts; ++Idx)
	Ops[Idx] = UndefVal;
	return DAG.getBuildVector(WidenVT, dl, Ops);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
	EVT VT = N->getValueType(0);
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	unsigned WidenNumElts = WidenVT.getVectorNumElements();
	SDValue InOp = N->getOperand(0);
	SDValue Idx = N->getOperand(1);
	SDLoc dl(N);

	if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
	InOp = GetWidenedVector(InOp);

	EVT InVT = InOp.getValueType();

	// Check if we can just return the input vector after widening.
	uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
	if (IdxVal == 0 && InVT == WidenVT)
	return InOp;

	// Check if we can extract from the vector.
	unsigned InNumElts = InVT.getVectorNumElements();
	if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);

	// We could try widening the input to the right length but for now, extract
	// the original elements, fill the rest with undefs and build a vector.
	SmallVector<SDValue, 16> Ops(WidenNumElts);
	EVT EltVT = VT.getVectorElementType();
	unsigned NumElts = VT.getVectorNumElements();
	unsigned i;
	for (i=0; i < NumElts; ++i)
	Ops[i] =
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
	DAG.getConstant(IdxVal + i, dl,
	TLI.getVectorIdxTy(DAG.getDataLayout())));

	SDValue UndefVal = DAG.getUNDEF(EltVT);
	for (; i < WidenNumElts; ++i)
	Ops[i] = UndefVal;
	return DAG.getBuildVector(WidenVT, dl, Ops);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
	SDValue InOp = GetWidenedVector(N->getOperand(0));
	return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
	InOp.getValueType(), InOp,
	N->getOperand(1), N->getOperand(2));
	}

	SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
	LoadSDNode *LD = cast<LoadSDNode>(N);
	ISD::LoadExtType ExtType = LD->getExtensionType();

	SDValue Result;
	SmallVector<SDValue, 16> LdChain; // Chain for the series of load
	if (ExtType != ISD::NON_EXTLOAD)
	Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
	else
	Result = GenWidenVectorLoads(LdChain, LD);

	// If we generate a single load, we can use that for the chain. Otherwise,
	// build a factor node to remember the multiple loads are independent and
	// chain to that.
	SDValue NewChain;
	if (LdChain.size() == 1)
	NewChain = LdChain[0];
	else
	NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);

	// Modified the chain - switch anything that used the old chain to use
	// the new one.
	ReplaceValueWith(SDValue(N, 1), NewChain);

	return Result;
	}

	SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {

	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
	SDValue Mask = N->getMask();
	EVT MaskVT = Mask.getValueType();
	SDValue Src0 = GetWidenedVector(N->getSrc0());
	ISD::LoadExtType ExtType = N->getExtensionType();
	SDLoc dl(N);

	if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
	Mask = GetWidenedVector(Mask);
	else {
	EVT BoolVT = getSetCCResultType(WidenVT);

	// We can't use ModifyToType() because we should fill the mask with
	// zeroes
	unsigned WidenNumElts = BoolVT.getVectorNumElements();
	unsigned MaskNumElts = MaskVT.getVectorNumElements();

	unsigned NumConcat = WidenNumElts / MaskNumElts;
	SmallVector<SDValue, 16> Ops(NumConcat);
	SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
	Ops[0] = Mask;
	for (unsigned i = 1; i != NumConcat; ++i)
	Ops[i] = ZeroVal;

	Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
	}

	SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
	Mask, Src0, N->getMemoryVT(),
	N->getMemOperand(), ExtType,
	N->isExpandingLoad());
	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
	return Res;
	}

	SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {

	EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDValue Mask = N->getMask();
	SDValue Src0 = GetWidenedVector(N->getValue());
	unsigned NumElts = WideVT.getVectorNumElements();
	SDLoc dl(N);

	// The mask should be widened as well
	Mask = WidenTargetBoolean(Mask, WideVT, true);

	// Widen the Index operand
	SDValue Index = N->getIndex();
	EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
	Index.getValueType().getScalarType(),
	NumElts);
	Index = ModifyToType(Index, WideIndexVT);
	SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
	SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
	N->getMemoryVT(), dl, Ops,
	N->getMemOperand());

	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
	return Res;
	}

	SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N),
	WidenVT, N->getOperand(0));
	}

	// Return true if this is a node that could have two SETCCs as operands.
	static inline bool isLogicalMaskOp(unsigned Opcode) {
	switch (Opcode) {
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR:
	return true;
	}
	return false;
	}

	// This is used just for the assert in convertMask(). Check that this either
	// a SETCC or a previously handled SETCC by convertMask().
	#ifndef NDEBUG
	static inline bool isSETCCorConvertedSETCC(SDValue N) {
	if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
	N = N.getOperand(0);
	else if (N.getOpcode() == ISD::CONCAT_VECTORS) {
	for (unsigned i = 1; i < N->getNumOperands(); ++i)
	if (!N->getOperand(i)->isUndef())
	return false;
	N = N.getOperand(0);
	}

	if (N.getOpcode() == ISD::TRUNCATE)
	N = N.getOperand(0);
	else if (N.getOpcode() == ISD::SIGN_EXTEND)
	N = N.getOperand(0);

	if (isLogicalMaskOp(N.getOpcode()))
	return isSETCCorConvertedSETCC(N.getOperand(0)) &&
	isSETCCorConvertedSETCC(N.getOperand(1));

	return (N.getOpcode() == ISD::SETCC \|\|
	ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
	}
	#endif

	// Return a mask of vector type MaskVT to replace InMask. Also adjust MaskVT
	// to ToMaskVT if needed with vector extension or truncation.
	SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
	EVT ToMaskVT) {
	// Currently a SETCC or a AND/OR/XOR with two SETCCs are handled.
	// FIXME: This code seems to be too restrictive, we might consider
	// generalizing it or dropping it.
	assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument.");

	// Make a new Mask node, with a legal result VT.
	SmallVector<SDValue, 4> Ops;
	for (unsigned i = 0; i < InMask->getNumOperands(); ++i)
	Ops.push_back(InMask->getOperand(i));
	SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);

	// If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
	// extend or truncate is needed.
	LLVMContext &Ctx = *DAG.getContext();
	unsigned MaskScalarBits = MaskVT.getScalarSizeInBits();
	unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits();
	if (MaskScalarBits < ToMaskScalBits) {
	EVT ExtVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
	MaskVT.getVectorNumElements());
	Mask = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Mask), ExtVT, Mask);
	} else if (MaskScalarBits > ToMaskScalBits) {
	EVT TruncVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
	MaskVT.getVectorNumElements());
	Mask = DAG.getNode(ISD::TRUNCATE, SDLoc(Mask), TruncVT, Mask);
	}

	assert(Mask->getValueType(0).getScalarSizeInBits() ==
	ToMaskVT.getScalarSizeInBits() &&
	"Mask should have the right element size by now.");

	// Adjust Mask to the right number of elements.
	unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements();
	if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) {
	MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
	SDValue ZeroIdx = DAG.getConstant(0, SDLoc(Mask), IdxTy);
	Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask,
	ZeroIdx);
	} else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
	unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls);
	EVT SubVT = Mask->getValueType(0);
	SmallVector<SDValue, 16> SubConcatOps(NumSubVecs);
	SubConcatOps[0] = Mask;
	for (unsigned i = 1; i < NumSubVecs; ++i)
	SubConcatOps[i] = DAG.getUNDEF(SubVT);
	Mask =
	DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubConcatOps);
	}

	assert((Mask->getValueType(0) == ToMaskVT) &&
	"A mask of ToMaskVT should have been produced by now.");

	return Mask;
	}

	// Get the target mask VT, and widen if needed.
	EVT DAGTypeLegalizer::getSETCCWidenedResultTy(SDValue SetCC) {
	assert(SetCC->getOpcode() == ISD::SETCC);
	LLVMContext &Ctx = *DAG.getContext();
	EVT MaskVT = getSetCCResultType(SetCC->getOperand(0).getValueType());
	if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
	MaskVT = TLI.getTypeToTransformTo(Ctx, MaskVT);
	return MaskVT;
	}

	// This method tries to handle VSELECT and its mask by legalizing operands
	// (which may require widening) and if needed adjusting the mask vector type
	// to match that of the VSELECT. Without it, many cases end up with
	// scalarization of the SETCC, with many unnecessary instructions.
	SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
	LLVMContext &Ctx = *DAG.getContext();
	SDValue Cond = N->getOperand(0);

	if (N->getOpcode() != ISD::VSELECT)
	return SDValue();

	if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode()))
	return SDValue();

	// If this is a splitted VSELECT that was previously already handled, do
	// nothing.
	if (Cond->getValueType(0).getScalarSizeInBits() != 1)
	return SDValue();

	EVT VSelVT = N->getValueType(0);
	// Only handle vector types which are a power of 2.
	if (!isPowerOf2_64(VSelVT.getSizeInBits()))
	return SDValue();

	// Don't touch if this will be scalarized.
	EVT FinalVT = VSelVT;
	while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
	FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx);

	if (FinalVT.getVectorNumElements() == 1)
	return SDValue();

	// If there is support for an i1 vector mask, don't touch.
	if (Cond.getOpcode() == ISD::SETCC) {
	EVT SetCCOpVT = Cond->getOperand(0).getValueType();
	while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
	SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
	EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
	if (SetCCResVT.getScalarSizeInBits() == 1)
	return SDValue();
	}

	// Get the VT and operands for VSELECT, and widen if needed.
	SDValue VSelOp1 = N->getOperand(1);
	SDValue VSelOp2 = N->getOperand(2);
	if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) {
	VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT);
	VSelOp1 = GetWidenedVector(VSelOp1);
	VSelOp2 = GetWidenedVector(VSelOp2);
	}

	// The mask of the VSELECT should have integer elements.
	EVT ToMaskVT = VSelVT;
	if (!ToMaskVT.getScalarType().isInteger())
	ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();

	SDValue Mask;
	if (Cond->getOpcode() == ISD::SETCC) {
	EVT MaskVT = getSETCCWidenedResultTy(Cond);
	Mask = convertMask(Cond, MaskVT, ToMaskVT);
	} else if (isLogicalMaskOp(Cond->getOpcode()) &&
	Cond->getOperand(0).getOpcode() == ISD::SETCC &&
	Cond->getOperand(1).getOpcode() == ISD::SETCC) {
	// Cond is (AND/OR/XOR (SETCC, SETCC))
	SDValue SETCC0 = Cond->getOperand(0);
	SDValue SETCC1 = Cond->getOperand(1);
	EVT VT0 = getSETCCWidenedResultTy(SETCC0);
	EVT VT1 = getSETCCWidenedResultTy(SETCC1);
	unsigned ScalarBits0 = VT0.getScalarSizeInBits();
	unsigned ScalarBits1 = VT1.getScalarSizeInBits();
	unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
	EVT MaskVT;
	// If the two SETCCs have different VTs, either extend/truncate one of
	// them to the other "towards" ToMaskVT, or truncate one and extend the
	// other to ToMaskVT.
	if (ScalarBits0 != ScalarBits1) {
	EVT NarrowVT = ((ScalarBits0 < ScalarBits1) ? VT0 : VT1);
	EVT WideVT = ((NarrowVT == VT0) ? VT1 : VT0);
	if (ScalarBits_ToMask >= WideVT.getScalarSizeInBits())
	MaskVT = WideVT;
	else if (ScalarBits_ToMask <= NarrowVT.getScalarSizeInBits())
	MaskVT = NarrowVT;
	else
	MaskVT = ToMaskVT;
	} else
	// If the two SETCCs have the same VT, don't change it.
	MaskVT = VT0;

	// Make new SETCCs and logical nodes.
	SETCC0 = convertMask(SETCC0, VT0, MaskVT);
	SETCC1 = convertMask(SETCC1, VT1, MaskVT);
	Cond = DAG.getNode(Cond->getOpcode(), SDLoc(Cond), MaskVT, SETCC0, SETCC1);

	// Convert the logical op for VSELECT if needed.
	Mask = convertMask(Cond, MaskVT, ToMaskVT);
	} else
	return SDValue();

	return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	unsigned WidenNumElts = WidenVT.getVectorNumElements();

	SDValue Cond1 = N->getOperand(0);
	EVT CondVT = Cond1.getValueType();
	if (CondVT.isVector()) {
	if (SDValue Res = WidenVSELECTAndMask(N))
	return Res;

	EVT CondEltVT = CondVT.getVectorElementType();
	EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
	CondEltVT, WidenNumElts);
	if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
	Cond1 = GetWidenedVector(Cond1);

	// If we have to split the condition there is no point in widening the
	// select. This would result in an cycle of widening the select ->
	// widening the condition operand -> splitting the condition operand ->
	// splitting the select -> widening the select. Instead split this select
	// further and widen the resulting type.
	if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) {
	SDValue SplitSelect = SplitVecOp_VSELECT(N, 0);
	SDValue Res = ModifyToType(SplitSelect, WidenVT);
	return Res;
	}

	if (Cond1.getValueType() != CondWidenVT)
	Cond1 = ModifyToType(Cond1, CondWidenVT);
	}

	SDValue InOp1 = GetWidenedVector(N->getOperand(1));
	SDValue InOp2 = GetWidenedVector(N->getOperand(2));
	assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
	return DAG.getNode(N->getOpcode(), SDLoc(N),
	WidenVT, Cond1, InOp1, InOp2);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
	SDValue InOp1 = GetWidenedVector(N->getOperand(2));
	SDValue InOp2 = GetWidenedVector(N->getOperand(3));
	return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
	InOp1.getValueType(), N->getOperand(0),
	N->getOperand(1), InOp1, InOp2, N->getOperand(4));
	}

	SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
	assert(N->getValueType(0).isVector() ==
	N->getOperand(0).getValueType().isVector() &&
	"Scalar/Vector type mismatch");
	if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N);

	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDValue InOp1 = GetWidenedVector(N->getOperand(0));
	SDValue InOp2 = GetWidenedVector(N->getOperand(1));
	return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT,
	InOp1, InOp2, N->getOperand(2));
	}

	SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	return DAG.getUNDEF(WidenVT);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
	EVT VT = N->getValueType(0);
	SDLoc dl(N);

	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	unsigned NumElts = VT.getVectorNumElements();
	unsigned WidenNumElts = WidenVT.getVectorNumElements();

	SDValue InOp1 = GetWidenedVector(N->getOperand(0));
	SDValue InOp2 = GetWidenedVector(N->getOperand(1));

	// Adjust mask based on new input vector length.
	SmallVector<int, 16> NewMask;
	for (unsigned i = 0; i != NumElts; ++i) {
	int Idx = N->getMaskElt(i);
	if (Idx < (int)NumElts)
	NewMask.push_back(Idx);
	else
	NewMask.push_back(Idx - NumElts + WidenNumElts);
	}
	for (unsigned i = NumElts; i != WidenNumElts; ++i)
	NewMask.push_back(-1);
	return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
	}

	SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
	assert(N->getValueType(0).isVector() &&
	N->getOperand(0).getValueType().isVector() &&
	"Operands must be vectors");
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	unsigned WidenNumElts = WidenVT.getVectorNumElements();

	SDValue InOp1 = N->getOperand(0);
	EVT InVT = InOp1.getValueType();
	assert(InVT.isVector() && "can not widen non-vector type");
	EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
	InVT.getVectorElementType(), WidenNumElts);

	// The input and output types often differ here, and it could be that while
	// we'd prefer to widen the result type, the input operands have been split.
	// In this case, we also need to split the result of this node as well.
	if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
	SDValue SplitVSetCC = SplitVecOp_VSETCC(N);
	SDValue Res = ModifyToType(SplitVSetCC, WidenVT);
	return Res;
	}

	InOp1 = GetWidenedVector(InOp1);
	SDValue InOp2 = GetWidenedVector(N->getOperand(1));

	// Assume that the input and output will be widen appropriately. If not,
	// we will have to unroll it at some point.
	assert(InOp1.getValueType() == WidenInVT &&
	InOp2.getValueType() == WidenInVT &&
	"Input not widened to expected type!");
	(void)WidenInVT;
	return DAG.getNode(ISD::SETCC, SDLoc(N),
	WidenVT, InOp1, InOp2, N->getOperand(2));
	}


	//===----------------------------------------------------------------------===//
	// Widen Vector Operand
	//===----------------------------------------------------------------------===//
	bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
	DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
	N->dump(&DAG);
	dbgs() << "\n");
	SDValue Res = SDValue();

	// See if the target wants to custom widen this node.
	if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
	return false;

	switch (N->getOpcode()) {
	default:
	#ifndef NDEBUG
	dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
	N->dump(&DAG);
	dbgs() << "\n";
	#endif
	llvm_unreachable("Do not know how to widen this operator's operand!");

	case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
	case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
	case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
	case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
	case ISD::STORE: Res = WidenVecOp_STORE(N); break;
	case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
	case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
	case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
	case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;

	case ISD::ANY_EXTEND:
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	Res = WidenVecOp_EXTEND(N);
	break;

	case ISD::FP_EXTEND:
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP:
	case ISD::TRUNCATE:
	Res = WidenVecOp_Convert(N);
	break;
	}

	// If Res is null, the sub-method took care of registering the result.
	if (!Res.getNode()) return false;

	// If the result is N, the sub-method updated N in place. Tell the legalizer
	// core about this.
	if (Res.getNode() == N)
	return true;


	assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
	"Invalid operand expansion");

	ReplaceValueWith(SDValue(N, 0), Res);
	return false;
	}

	SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	SDValue InOp = N->getOperand(0);
	// If some legalization strategy other than widening is used on the operand,
	// we can't safely assume that just extending the low lanes is the correct
	// transformation.
	if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
	return WidenVecOp_Convert(N);
	InOp = GetWidenedVector(InOp);
	assert(VT.getVectorNumElements() <
	InOp.getValueType().getVectorNumElements() &&
	"Input wasn't widened!");

	// We may need to further widen the operand until it has the same total
	// vector size as the result.
	EVT InVT = InOp.getValueType();
	if (InVT.getSizeInBits() != VT.getSizeInBits()) {
	EVT InEltVT = InVT.getVectorElementType();
	for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) {
	EVT FixedVT = (MVT::SimpleValueType)i;
	EVT FixedEltVT = FixedVT.getVectorElementType();
	if (TLI.isTypeLegal(FixedVT) &&
	FixedVT.getSizeInBits() == VT.getSizeInBits() &&
	FixedEltVT == InEltVT) {
	assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
	"Not enough elements in the fixed type for the operand!");
	assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
	"We can't have the same type as we started with!");
	if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
	InOp = DAG.getNode(
	ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp,
	DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
	else
	InOp = DAG.getNode(
	ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
	DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
	break;
	}
	}
	InVT = InOp.getValueType();
	if (InVT.getSizeInBits() != VT.getSizeInBits())
	// We couldn't find a legal vector type that was a widening of the input
	// and could be extended in-register to the result type, so we have to
	// scalarize.
	return WidenVecOp_Convert(N);
	}

	// Use special DAG nodes to represent the operation of extending the
	// low lanes.
	switch (N->getOpcode()) {
	default:
	llvm_unreachable("Extend legalization on on extend operation!");
	case ISD::ANY_EXTEND:
	return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
	case ISD::SIGN_EXTEND:
	return DAG.getSignExtendVectorInReg(InOp, DL, VT);
	case ISD::ZERO_EXTEND:
	return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
	}
	}

	SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
	// The result (and first input) is legal, but the second input is illegal.
	// We can't do much to fix that, so just unroll and let the extracts off of
	// the second input be widened as needed later.
	return DAG.UnrollVectorOp(N);
	}

	SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
	// Since the result is legal and the input is illegal, it is unlikely that we
	// can fix the input to a legal type so unroll the convert into some scalar
	// code and create a nasty build vector.
	EVT VT = N->getValueType(0);
	EVT EltVT = VT.getVectorElementType();
	SDLoc dl(N);
	unsigned NumElts = VT.getVectorNumElements();
	SDValue InOp = N->getOperand(0);
	if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
	InOp = GetWidenedVector(InOp);
	EVT InVT = InOp.getValueType();
	EVT InEltVT = InVT.getVectorElementType();

	unsigned Opcode = N->getOpcode();
	SmallVector<SDValue, 16> Ops(NumElts);
	for (unsigned i=0; i < NumElts; ++i)
	Ops[i] = DAG.getNode(
	Opcode, dl, EltVT,
	DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
	DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));

	return DAG.getBuildVector(VT, dl, Ops);
	}

	SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
	EVT VT = N->getValueType(0);
	SDValue InOp = GetWidenedVector(N->getOperand(0));
	EVT InWidenVT = InOp.getValueType();
	SDLoc dl(N);

	// Check if we can convert between two legal vector types and extract.
	unsigned InWidenSize = InWidenVT.getSizeInBits();
	unsigned Size = VT.getSizeInBits();
	// x86mmx is not an acceptable vector element type, so don't try.
	if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
	unsigned NewNumElts = InWidenSize / Size;
	EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
	if (TLI.isTypeLegal(NewVT)) {
	SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
	return DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
	DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
	}
	}

	return CreateStackStoreLoad(InOp, VT);
	}

	SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
	// If the input vector is not legal, it is likely that we will not find a
	// legal vector of the same size. Replace the concatenate vector with a
	// nasty build vector.
	EVT VT = N->getValueType(0);
	EVT EltVT = VT.getVectorElementType();
	SDLoc dl(N);
	unsigned NumElts = VT.getVectorNumElements();
	SmallVector<SDValue, 16> Ops(NumElts);

	EVT InVT = N->getOperand(0).getValueType();
	unsigned NumInElts = InVT.getVectorNumElements();

	unsigned Idx = 0;
	unsigned NumOperands = N->getNumOperands();
	for (unsigned i=0; i < NumOperands; ++i) {
	SDValue InOp = N->getOperand(i);
	if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
	InOp = GetWidenedVector(InOp);
	for (unsigned j=0; j < NumInElts; ++j)
	Ops[Idx++] = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
	DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
	}
	return DAG.getBuildVector(VT, dl, Ops);
	}

	SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
	SDValue InOp = GetWidenedVector(N->getOperand(0));
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
	N->getValueType(0), InOp, N->getOperand(1));
	}

	SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
	SDValue InOp = GetWidenedVector(N->getOperand(0));
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
	N->getValueType(0), InOp, N->getOperand(1));
	}

	SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
	// We have to widen the value, but we want only to store the original
	// vector type.
	StoreSDNode *ST = cast<StoreSDNode>(N);

	SmallVector<SDValue, 16> StChain;
	if (ST->isTruncatingStore())
	GenWidenVectorTruncStores(StChain, ST);
	else
	GenWidenVectorStores(StChain, ST);

	if (StChain.size() == 1)
	return StChain[0];
	else
	return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
	}

	SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
	MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
	SDValue Mask = MST->getMask();
	EVT MaskVT = Mask.getValueType();
	SDValue StVal = MST->getValue();
	// Widen the value
	SDValue WideVal = GetWidenedVector(StVal);
	SDLoc dl(N);

	if (OpNo == 2 \|\| getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
	Mask = GetWidenedVector(Mask);
	else {
	// The mask should be widened as well.
	EVT BoolVT = getSetCCResultType(WideVal.getValueType());
	// We can't use ModifyToType() because we should fill the mask with
	// zeroes.
	unsigned WidenNumElts = BoolVT.getVectorNumElements();
	unsigned MaskNumElts = MaskVT.getVectorNumElements();

	unsigned NumConcat = WidenNumElts / MaskNumElts;
	SmallVector<SDValue, 16> Ops(NumConcat);
	SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
	Ops[0] = Mask;
	for (unsigned i = 1; i != NumConcat; ++i)
	Ops[i] = ZeroVal;

	Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
	}
	assert(Mask.getValueType().getVectorNumElements() ==
	WideVal.getValueType().getVectorNumElements() &&
	"Mask and data vectors should have the same number of elements");
	return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
	Mask, MST->getMemoryVT(), MST->getMemOperand(),
	false, MST->isCompressingStore());
	}

	SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
	assert(OpNo == 1 && "Can widen only data operand of mscatter");
	MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
	SDValue DataOp = MSC->getValue();
	SDValue Mask = MSC->getMask();

	// Widen the value.
	SDValue WideVal = GetWidenedVector(DataOp);
	EVT WideVT = WideVal.getValueType();
	unsigned NumElts = WideVal.getValueType().getVectorNumElements();
	SDLoc dl(N);

	// The mask should be widened as well.
	Mask = WidenTargetBoolean(Mask, WideVT, true);

	// Widen index.
	SDValue Index = MSC->getIndex();
	EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
	Index.getValueType().getScalarType(),
	NumElts);
	Index = ModifyToType(Index, WideIndexVT);

	SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index};
	return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
	MSC->getMemoryVT(), dl, Ops,
	MSC->getMemOperand());
	}

	SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
	SDValue InOp0 = GetWidenedVector(N->getOperand(0));
	SDValue InOp1 = GetWidenedVector(N->getOperand(1));
	SDLoc dl(N);

	// WARNING: In this code we widen the compare instruction with garbage.
	// This garbage may contain denormal floats which may be slow. Is this a real
	// concern ? Should we zero the unused lanes if this is a float compare ?

	// Get a new SETCC node to compare the newly widened operands.
	// Only some of the compared elements are legal.
	EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
	InOp0.getValueType());
	SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
	SVT, InOp0, InOp1, N->getOperand(2));

	// Extract the needed results from the result vector.
	EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
	SVT.getVectorElementType(),
	N->getValueType(0).getVectorNumElements());
	SDValue CC = DAG.getNode(
	ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
	DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));

	return PromoteTargetBoolean(CC, N->getValueType(0));
	}


	//===----------------------------------------------------------------------===//
	// Vector Widening Utilities
	//===----------------------------------------------------------------------===//

	// Utility function to find the type to chop up a widen vector for load/store
	// TLI: Target lowering used to determine legal types.
	// Width: Width left need to load/store.
	// WidenVT: The widen vector type to load to/store from
	// Align: If 0, don't allow use of a wider type
	// WidenEx: If Align is not 0, the amount additional we can load/store from.

	static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
	unsigned Width, EVT WidenVT,
	unsigned Align = 0, unsigned WidenEx = 0) {
	EVT WidenEltVT = WidenVT.getVectorElementType();
	unsigned WidenWidth = WidenVT.getSizeInBits();
	unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
	unsigned AlignInBits = Align*8;

	// If we have one element to load/store, return it.
	EVT RetVT = WidenEltVT;
	if (Width == WidenEltWidth)
	return RetVT;

	// See if there is larger legal integer than the element type to load/store.
	unsigned VT;
	for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
	VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
	EVT MemVT((MVT::SimpleValueType) VT);
	unsigned MemVTWidth = MemVT.getSizeInBits();
	if (MemVT.getSizeInBits() <= WidenEltWidth)
	break;
	auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
	if ((Action == TargetLowering::TypeLegal \|\|
	Action == TargetLowering::TypePromoteInteger) &&
	(WidenWidth % MemVTWidth) == 0 &&
	isPowerOf2_32(WidenWidth / MemVTWidth) &&
	(MemVTWidth <= Width \|\|
	(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
	RetVT = MemVT;
	break;
	}
	}

	// See if there is a larger vector type to load/store that has the same vector
	// element type and is evenly divisible with the WidenVT.
	for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
	VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
	EVT MemVT = (MVT::SimpleValueType) VT;
	unsigned MemVTWidth = MemVT.getSizeInBits();
	if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
	(WidenWidth % MemVTWidth) == 0 &&
	isPowerOf2_32(WidenWidth / MemVTWidth) &&
	(MemVTWidth <= Width \|\|
	(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
	if (RetVT.getSizeInBits() < MemVTWidth \|\| MemVT == WidenVT)
	return MemVT;
	}
	}

	return RetVT;
	}

	// Builds a vector type from scalar loads
	// VecTy: Resulting Vector type
	// LDOps: Load operators to build a vector type
	// [Start,End) the list of loads to use.
	static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
	SmallVectorImpl<SDValue> &LdOps,
	unsigned Start, unsigned End) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDLoc dl(LdOps[Start]);
	EVT LdTy = LdOps[Start].getValueType();
	unsigned Width = VecTy.getSizeInBits();
	unsigned NumElts = Width / LdTy.getSizeInBits();
	EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);

	unsigned Idx = 1;
	SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);

	for (unsigned i = Start + 1; i != End; ++i) {
	EVT NewLdTy = LdOps[i].getValueType();
	if (NewLdTy != LdTy) {
	NumElts = Width / NewLdTy.getSizeInBits();
	NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
	VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
	// Readjust position and vector position based on new load type.
	Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
	LdTy = NewLdTy;
	}
	VecOp = DAG.getNode(
	ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
	DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
	}
	return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
	}

	SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
	LoadSDNode *LD) {
	// The strategy assumes that we can efficiently load power-of-two widths.
	// The routine chops the vector into the largest vector loads with the same
	// element type or scalar loads and then recombines it to the widen vector
	// type.
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
	unsigned WidenWidth = WidenVT.getSizeInBits();
	EVT LdVT = LD->getMemoryVT();
	SDLoc dl(LD);
	assert(LdVT.isVector() && WidenVT.isVector());
	assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());

	// Load information
	SDValue Chain = LD->getChain();
	SDValue BasePtr = LD->getBasePtr();
	unsigned Align = LD->getAlignment();
	MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
	AAMDNodes AAInfo = LD->getAAInfo();

	int LdWidth = LdVT.getSizeInBits();
	int WidthDiff = WidenWidth - LdWidth;
	unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads.

	// Find the vector type that can load from.
	EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
	int NewVTWidth = NewVT.getSizeInBits();
	SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
	Align, MMOFlags, AAInfo);
	LdChain.push_back(LdOp.getValue(1));

	// Check if we can load the element with one instruction.
	if (LdWidth <= NewVTWidth) {
	if (!NewVT.isVector()) {
	unsigned NumElts = WidenWidth / NewVTWidth;
	EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
	SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
	return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
	}
	if (NewVT == WidenVT)
	return LdOp;

	assert(WidenWidth % NewVTWidth == 0);
	unsigned NumConcat = WidenWidth / NewVTWidth;
	SmallVector<SDValue, 16> ConcatOps(NumConcat);
	SDValue UndefVal = DAG.getUNDEF(NewVT);
	ConcatOps[0] = LdOp;
	for (unsigned i = 1; i != NumConcat; ++i)
	ConcatOps[i] = UndefVal;
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
	}

	// Load vector by using multiple loads from largest vector to scalar.
	SmallVector<SDValue, 16> LdOps;
	LdOps.push_back(LdOp);

	LdWidth -= NewVTWidth;
	unsigned Offset = 0;

	while (LdWidth > 0) {
	unsigned Increment = NewVTWidth / 8;
	Offset += Increment;
	BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
	DAG.getConstant(Increment, dl, BasePtr.getValueType()));

	SDValue L;
	if (LdWidth < NewVTWidth) {
	// The current type we are using is too large. Find a better size.
	NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
	NewVTWidth = NewVT.getSizeInBits();
	L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
	LD->getPointerInfo().getWithOffset(Offset),
	MinAlign(Align, Increment), MMOFlags, AAInfo);
	LdChain.push_back(L.getValue(1));
	if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
	// Later code assumes the vector loads produced will be mergeable, so we
	// must pad the final entry up to the previous width. Scalars are
	// combined separately.
	SmallVector<SDValue, 16> Loads;
	Loads.push_back(L);
	unsigned size = L->getValueSizeInBits(0);
	while (size < LdOp->getValueSizeInBits(0)) {
	Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
	size += L->getValueSizeInBits(0);
	}
	L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads);
	}
	} else {
	L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
	LD->getPointerInfo().getWithOffset(Offset),
	MinAlign(Align, Increment), MMOFlags, AAInfo);
	LdChain.push_back(L.getValue(1));
	}

	LdOps.push_back(L);


	LdWidth -= NewVTWidth;
	}

	// Build the vector from the load operations.
	unsigned End = LdOps.size();
	if (!LdOps[0].getValueType().isVector())
	// All the loads are scalar loads.
	return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);

	// If the load contains vectors, build the vector using concat vector.
	// All of the vectors used to load are power-of-2, and the scalar loads can be
	// combined to make a power-of-2 vector.
	SmallVector<SDValue, 16> ConcatOps(End);
	int i = End - 1;
	int Idx = End;
	EVT LdTy = LdOps[i].getValueType();
	// First, combine the scalar loads to a vector.
	if (!LdTy.isVector()) {
	for (--i; i >= 0; --i) {
	LdTy = LdOps[i].getValueType();
	if (LdTy.isVector())
	break;
	}
	ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End);
	}
	ConcatOps[--Idx] = LdOps[i];
	for (--i; i >= 0; --i) {
	EVT NewLdTy = LdOps[i].getValueType();
	if (NewLdTy != LdTy) {
	// Create a larger vector.
	ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
	makeArrayRef(&ConcatOps[Idx], End - Idx));
	Idx = End - 1;
	LdTy = NewLdTy;
	}
	ConcatOps[--Idx] = LdOps[i];
	}

	if (WidenWidth == LdTy.getSizeInBits() * (End - Idx))
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
	makeArrayRef(&ConcatOps[Idx], End - Idx));

	// We need to fill the rest with undefs to build the vector.
	unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
	SmallVector<SDValue, 16> WidenOps(NumOps);
	SDValue UndefVal = DAG.getUNDEF(LdTy);
	{
	unsigned i = 0;
	for (; i != End-Idx; ++i)
	WidenOps[i] = ConcatOps[Idx+i];
	for (; i != NumOps; ++i)
	WidenOps[i] = UndefVal;
	}
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps);
	}

	SDValue
	DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
	LoadSDNode *LD,
	ISD::LoadExtType ExtType) {
	// For extension loads, it may not be more efficient to chop up the vector
	// and then extend it. Instead, we unroll the load and build a new vector.
	EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
	EVT LdVT = LD->getMemoryVT();
	SDLoc dl(LD);
	assert(LdVT.isVector() && WidenVT.isVector());

	// Load information
	SDValue Chain = LD->getChain();
	SDValue BasePtr = LD->getBasePtr();
	unsigned Align = LD->getAlignment();
	MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
	AAMDNodes AAInfo = LD->getAAInfo();

	EVT EltVT = WidenVT.getVectorElementType();
	EVT LdEltVT = LdVT.getVectorElementType();
	unsigned NumElts = LdVT.getVectorNumElements();

	// Load each element and widen.
	unsigned WidenNumElts = WidenVT.getVectorNumElements();
	SmallVector<SDValue, 16> Ops(WidenNumElts);
	unsigned Increment = LdEltVT.getSizeInBits() / 8;
	Ops[0] =
	DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(),
	LdEltVT, Align, MMOFlags, AAInfo);
	LdChain.push_back(Ops[0].getValue(1));
	unsigned i = 0, Offset = Increment;
	for (i=1; i < NumElts; ++i, Offset += Increment) {
	SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
	BasePtr,
	DAG.getConstant(Offset, dl,
	BasePtr.getValueType()));
	Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
	LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
	Align, MMOFlags, AAInfo);
	LdChain.push_back(Ops[i].getValue(1));
	}

	// Fill the rest with undefs.
	SDValue UndefVal = DAG.getUNDEF(EltVT);
	for (; i != WidenNumElts; ++i)
	Ops[i] = UndefVal;

	return DAG.getBuildVector(WidenVT, dl, Ops);
	}

	void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
	StoreSDNode *ST) {
	// The strategy assumes that we can efficiently store power-of-two widths.
	// The routine chops the vector into the largest vector stores with the same
	// element type or scalar stores.
	SDValue Chain = ST->getChain();
	SDValue BasePtr = ST->getBasePtr();
	unsigned Align = ST->getAlignment();
	MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
	AAMDNodes AAInfo = ST->getAAInfo();
	SDValue ValOp = GetWidenedVector(ST->getValue());
	SDLoc dl(ST);

	EVT StVT = ST->getMemoryVT();
	unsigned StWidth = StVT.getSizeInBits();
	EVT ValVT = ValOp.getValueType();
	unsigned ValWidth = ValVT.getSizeInBits();
	EVT ValEltVT = ValVT.getVectorElementType();
	unsigned ValEltWidth = ValEltVT.getSizeInBits();
	assert(StVT.getVectorElementType() == ValEltVT);

	int Idx = 0; // current index to store
	unsigned Offset = 0; // offset from base to store
	while (StWidth != 0) {
	// Find the largest vector type we can store with.
	EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
	unsigned NewVTWidth = NewVT.getSizeInBits();
	unsigned Increment = NewVTWidth / 8;
	if (NewVT.isVector()) {
	unsigned NumVTElts = NewVT.getVectorNumElements();
	do {
	SDValue EOp = DAG.getNode(
	ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
	DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
	StChain.push_back(DAG.getStore(
	Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
	MinAlign(Align, Offset), MMOFlags, AAInfo));
	StWidth -= NewVTWidth;
	Offset += Increment;
	Idx += NumVTElts;
	BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
	DAG.getConstant(Increment, dl,
	BasePtr.getValueType()));
	} while (StWidth != 0 && StWidth >= NewVTWidth);
	} else {
	// Cast the vector to the scalar type we can store.
	unsigned NumElts = ValWidth / NewVTWidth;
	EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
	SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
	// Readjust index position based on new vector type.
	Idx = Idx * ValEltWidth / NewVTWidth;
	do {
	SDValue EOp = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
	DAG.getConstant(Idx++, dl,
	TLI.getVectorIdxTy(DAG.getDataLayout())));
	StChain.push_back(DAG.getStore(
	Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
	MinAlign(Align, Offset), MMOFlags, AAInfo));
	StWidth -= NewVTWidth;
	Offset += Increment;
	BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
	DAG.getConstant(Increment, dl,
	BasePtr.getValueType()));
	} while (StWidth != 0 && StWidth >= NewVTWidth);
	// Restore index back to be relative to the original widen element type.
	Idx = Idx * NewVTWidth / ValEltWidth;
	}
	}
	}

	void
	DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
	StoreSDNode *ST) {
	// For extension loads, it may not be more efficient to truncate the vector
	// and then store it. Instead, we extract each element and then store it.
	SDValue Chain = ST->getChain();
	SDValue BasePtr = ST->getBasePtr();
	unsigned Align = ST->getAlignment();
	MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
	AAMDNodes AAInfo = ST->getAAInfo();
	SDValue ValOp = GetWidenedVector(ST->getValue());
	SDLoc dl(ST);

	EVT StVT = ST->getMemoryVT();
	EVT ValVT = ValOp.getValueType();

	// It must be true that the wide vector type is bigger than where we need to
	// store.
	assert(StVT.isVector() && ValOp.getValueType().isVector());
	assert(StVT.bitsLT(ValOp.getValueType()));

	// For truncating stores, we can not play the tricks of chopping legal vector
	// types and bitcast it to the right type. Instead, we unroll the store.
	EVT StEltVT = StVT.getVectorElementType();
	EVT ValEltVT = ValVT.getVectorElementType();
	unsigned Increment = ValEltVT.getSizeInBits() / 8;
	unsigned NumElts = StVT.getVectorNumElements();
	SDValue EOp = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
	DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
	StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
	ST->getPointerInfo(), StEltVT, Align,
	MMOFlags, AAInfo));
	unsigned Offset = Increment;
	for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
	SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
	BasePtr,
	DAG.getConstant(Offset, dl,
	BasePtr.getValueType()));
	SDValue EOp = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
	DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
	StChain.push_back(DAG.getTruncStore(
	Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset),
	StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo));
	}
	}

	/// Modifies a vector input (widen or narrows) to a vector of NVT. The
	/// input vector must have the same element type as NVT.
	/// FillWithZeroes specifies that the vector should be widened with zeroes.
	SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
	bool FillWithZeroes) {
	// Note that InOp might have been widened so it might already have
	// the right width or it might need be narrowed.
	EVT InVT = InOp.getValueType();
	assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
	"input and widen element type must match");
	SDLoc dl(InOp);

	// Check if InOp already has the right width.
	if (InVT == NVT)
	return InOp;

	unsigned InNumElts = InVT.getVectorNumElements();
	unsigned WidenNumElts = NVT.getVectorNumElements();
	if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
	unsigned NumConcat = WidenNumElts / InNumElts;
	SmallVector<SDValue, 16> Ops(NumConcat);
	SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
	DAG.getUNDEF(InVT);
	Ops[0] = InOp;
	for (unsigned i = 1; i != NumConcat; ++i)
	Ops[i] = FillVal;

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
	}

	if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
	return DAG.getNode(
	ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
	DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));

	// Fall back to extract and build.
	SmallVector<SDValue, 16> Ops(WidenNumElts);
	EVT EltVT = NVT.getVectorElementType();
	unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
	unsigned Idx;
	for (Idx = 0; Idx < MinNumElts; ++Idx)
	Ops[Idx] = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
	DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));

	SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
	DAG.getUNDEF(EltVT);
	for ( ; Idx < WidenNumElts; ++Idx)
	Ops[Idx] = FillVal;
	return DAG.getBuildVector(NVT, dl, Ops);
	}
	Index: head/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
	===================================================================
	--- head/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (revision 322854)
	+++ head/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (revision 322855)
	@@ -1,7958 +1,7961 @@
	//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This implements the SelectionDAG class.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/CodeGen/SelectionDAG.h"
	#include "SDNodeDbgValue.h"
	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/APSInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/BitVector.h"
	#include "llvm/ADT/FoldingSet.h"
	#include "llvm/ADT/None.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/CodeGen/ISDOpcodes.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineConstantPool.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/MachineValueType.h"
	#include "llvm/CodeGen/RuntimeLibcalls.h"
	#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
	#include "llvm/CodeGen/SelectionDAGNodes.h"
	#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
	#include "llvm/CodeGen/ValueTypes.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugInfoMetadata.h"
	#include "llvm/IR/DebugLoc.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/Value.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/ManagedStatic.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/Mutex.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetLowering.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Target/TargetOptions.h"
	#include "llvm/Target/TargetRegisterInfo.h"
	#include "llvm/Target/TargetSubtargetInfo.h"
	#include <algorithm>
	#include <cassert>
	#include <cstdint>
	#include <cstdlib>
	#include <limits>
	#include <set>
	#include <string>
	#include <utility>
	#include <vector>

	using namespace llvm;

	/// makeVTList - Return an instance of the SDVTList struct initialized with the
	/// specified members.
	static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
	SDVTList Res = {VTs, NumVTs};
	return Res;
	}

	// Default null implementations of the callbacks.
	void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode, SDNode) {}
	void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}

	//===----------------------------------------------------------------------===//
	// ConstantFPSDNode Class
	//===----------------------------------------------------------------------===//

	/// isExactlyValue - We don't rely on operator== working on double values, as
	/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
	/// As such, this method can be used to do an exact bit-for-bit comparison of
	/// two floating point values.
	bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
	return getValueAPF().bitwiseIsEqual(V);
	}

	bool ConstantFPSDNode::isValueValidForType(EVT VT,
	const APFloat& Val) {
	assert(VT.isFloatingPoint() && "Can only convert between FP types");

	// convert modifies in place, so make a copy.
	APFloat Val2 = APFloat(Val);
	bool losesInfo;
	(void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT),
	APFloat::rmNearestTiesToEven,
	&losesInfo);
	return !losesInfo;
	}

	//===----------------------------------------------------------------------===//
	// ISD Namespace
	//===----------------------------------------------------------------------===//

	-bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
	+bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal,
	+ bool AllowShrink) {
	auto *BV = dyn_cast<BuildVectorSDNode>(N);
	if (!BV)
	return false;

	APInt SplatUndef;
	unsigned SplatBitSize;
	bool HasUndefs;
	- EVT EltVT = N->getValueType(0).getVectorElementType();
	- return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs) &&
	- EltVT.getSizeInBits() >= SplatBitSize;
	+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
	+ unsigned MinSplatBits = AllowShrink ? 0 : EltSize;
	+ return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
	+ MinSplatBits) &&
	+ EltSize >= SplatBitSize;
	}

	// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
	// specializations of the more general isConstantSplatVector()?

	bool ISD::isBuildVectorAllOnes(const SDNode *N) {
	// Look through a bit convert.
	while (N->getOpcode() == ISD::BITCAST)
	N = N->getOperand(0).getNode();

	if (N->getOpcode() != ISD::BUILD_VECTOR) return false;

	unsigned i = 0, e = N->getNumOperands();

	// Skip over all of the undef values.
	while (i != e && N->getOperand(i).isUndef())
	++i;

	// Do not accept an all-undef vector.
	if (i == e) return false;

	// Do not accept build_vectors that aren't all constants or which have non-~0
	// elements. We have to be a bit careful here, as the type of the constant
	// may not be the same as the type of the vector elements due to type
	// legalization (the elements are promoted to a legal type for the target and
	// a vector of a type may be legal when the base element type is not).
	// We only want to check enough bits to cover the vector elements, because
	// we care if the resultant vector is all ones, not whether the individual
	// constants are.
	SDValue NotZero = N->getOperand(i);
	unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
	if (CN->getAPIntValue().countTrailingOnes() < EltSize)
	return false;
	} else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
	if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize)
	return false;
	} else
	return false;

	// Okay, we have at least one ~0 value, check to see if the rest match or are
	// undefs. Even with the above element type twiddling, this should be OK, as
	// the same type legalization should have applied to all the elements.
	for (++i; i != e; ++i)
	if (N->getOperand(i) != NotZero && !N->getOperand(i).isUndef())
	return false;
	return true;
	}

	bool ISD::isBuildVectorAllZeros(const SDNode *N) {
	// Look through a bit convert.
	while (N->getOpcode() == ISD::BITCAST)
	N = N->getOperand(0).getNode();

	if (N->getOpcode() != ISD::BUILD_VECTOR) return false;

	bool IsAllUndef = true;
	for (const SDValue &Op : N->op_values()) {
	if (Op.isUndef())
	continue;
	IsAllUndef = false;
	// Do not accept build_vectors that aren't all constants or which have non-0
	// elements. We have to be a bit careful here, as the type of the constant
	// may not be the same as the type of the vector elements due to type
	// legalization (the elements are promoted to a legal type for the target
	// and a vector of a type may be legal when the base element type is not).
	// We only want to check enough bits to cover the vector elements, because
	// we care if the resultant vector is all zeros, not whether the individual
	// constants are.
	unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
	if (CN->getAPIntValue().countTrailingZeros() < EltSize)
	return false;
	} else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) {
	if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize)
	return false;
	} else
	return false;
	}

	// Do not accept an all-undef vector.
	if (IsAllUndef)
	return false;
	return true;
	}

	bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
	if (N->getOpcode() != ISD::BUILD_VECTOR)
	return false;

	for (const SDValue &Op : N->op_values()) {
	if (Op.isUndef())
	continue;
	if (!isa<ConstantSDNode>(Op))
	return false;
	}
	return true;
	}

	bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
	if (N->getOpcode() != ISD::BUILD_VECTOR)
	return false;

	for (const SDValue &Op : N->op_values()) {
	if (Op.isUndef())
	continue;
	if (!isa<ConstantFPSDNode>(Op))
	return false;
	}
	return true;
	}

	bool ISD::allOperandsUndef(const SDNode *N) {
	// Return false if the node has no operands.
	// This is "logically inconsistent" with the definition of "all" but
	// is probably the desired behavior.
	if (N->getNumOperands() == 0)
	return false;

	for (const SDValue &Op : N->op_values())
	if (!Op.isUndef())
	return false;

	return true;
	}

	ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
	switch (ExtType) {
	case ISD::EXTLOAD:
	return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
	case ISD::SEXTLOAD:
	return ISD::SIGN_EXTEND;
	case ISD::ZEXTLOAD:
	return ISD::ZERO_EXTEND;
	default:
	break;
	}

	llvm_unreachable("Invalid LoadExtType");
	}

	ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
	// To perform this operation, we just need to swap the L and G bits of the
	// operation.
	unsigned OldL = (Operation >> 2) & 1;
	unsigned OldG = (Operation >> 1) & 1;
	return ISD::CondCode((Operation & ~6) \| // Keep the N, U, E bits
	(OldL << 1) \| // New G bit
	(OldG << 2)); // New L bit.
	}

	ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
	unsigned Operation = Op;
	if (isInteger)
	Operation ^= 7; // Flip L, G, E bits, but not U.
	else
	Operation ^= 15; // Flip all of the condition bits.

	if (Operation > ISD::SETTRUE2)
	Operation &= ~8; // Don't let N and U bits get set.

	return ISD::CondCode(Operation);
	}

	/// For an integer comparison, return 1 if the comparison is a signed operation
	/// and 2 if the result is an unsigned comparison. Return zero if the operation
	/// does not depend on the sign of the input (setne and seteq).
	static int isSignedOp(ISD::CondCode Opcode) {
	switch (Opcode) {
	default: llvm_unreachable("Illegal integer setcc operation!");
	case ISD::SETEQ:
	case ISD::SETNE: return 0;
	case ISD::SETLT:
	case ISD::SETLE:
	case ISD::SETGT:
	case ISD::SETGE: return 1;
	case ISD::SETULT:
	case ISD::SETULE:
	case ISD::SETUGT:
	case ISD::SETUGE: return 2;
	}
	}

	ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
	bool IsInteger) {
	if (IsInteger && (isSignedOp(Op1) \| isSignedOp(Op2)) == 3)
	// Cannot fold a signed integer setcc with an unsigned integer setcc.
	return ISD::SETCC_INVALID;

	unsigned Op = Op1 \| Op2; // Combine all of the condition bits.

	// If the N and U bits get set, then the resultant comparison DOES suddenly
	// care about orderedness, and it is true when ordered.
	if (Op > ISD::SETTRUE2)
	Op &= ~16; // Clear the U bit if the N bit is set.

	// Canonicalize illegal integer setcc's.
	if (IsInteger && Op == ISD::SETUNE) // e.g. SETUGT \| SETULT
	Op = ISD::SETNE;

	return ISD::CondCode(Op);
	}

	ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
	bool IsInteger) {
	if (IsInteger && (isSignedOp(Op1) \| isSignedOp(Op2)) == 3)
	// Cannot fold a signed setcc with an unsigned setcc.
	return ISD::SETCC_INVALID;

	// Combine all of the condition bits.
	ISD::CondCode Result = ISD::CondCode(Op1 & Op2);

	// Canonicalize illegal integer setcc's.
	if (IsInteger) {
	switch (Result) {
	default: break;
	case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
	case ISD::SETOEQ: // SETEQ & SETU[LG]E
	case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
	case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
	case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
	}
	}

	return Result;
	}

	//===----------------------------------------------------------------------===//
	// SDNode Profile Support
	//===----------------------------------------------------------------------===//

	/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
	static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
	ID.AddInteger(OpC);
	}

	/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
	/// solely with their pointer.
	static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
	ID.AddPointer(VTList.VTs);
	}

	/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
	static void AddNodeIDOperands(FoldingSetNodeID &ID,
	ArrayRef<SDValue> Ops) {
	for (auto& Op : Ops) {
	ID.AddPointer(Op.getNode());
	ID.AddInteger(Op.getResNo());
	}
	}

	/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
	static void AddNodeIDOperands(FoldingSetNodeID &ID,
	ArrayRef<SDUse> Ops) {
	for (auto& Op : Ops) {
	ID.AddPointer(Op.getNode());
	ID.AddInteger(Op.getResNo());
	}
	}

	static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
	SDVTList VTList, ArrayRef<SDValue> OpList) {
	AddNodeIDOpcode(ID, OpC);
	AddNodeIDValueTypes(ID, VTList);
	AddNodeIDOperands(ID, OpList);
	}

	/// If this is an SDNode with special info, add this info to the NodeID data.
	static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
	switch (N->getOpcode()) {
	case ISD::TargetExternalSymbol:
	case ISD::ExternalSymbol:
	case ISD::MCSymbol:
	llvm_unreachable("Should only be used on nodes with operands");
	default: break; // Normal nodes don't need extra info.
	case ISD::TargetConstant:
	case ISD::Constant: {
	const ConstantSDNode *C = cast<ConstantSDNode>(N);
	ID.AddPointer(C->getConstantIntValue());
	ID.AddBoolean(C->isOpaque());
	break;
	}
	case ISD::TargetConstantFP:
	case ISD::ConstantFP:
	ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
	break;
	case ISD::TargetGlobalAddress:
	case ISD::GlobalAddress:
	case ISD::TargetGlobalTLSAddress:
	case ISD::GlobalTLSAddress: {
	const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
	ID.AddPointer(GA->getGlobal());
	ID.AddInteger(GA->getOffset());
	ID.AddInteger(GA->getTargetFlags());
	break;
	}
	case ISD::BasicBlock:
	ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
	break;
	case ISD::Register:
	ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
	break;
	case ISD::RegisterMask:
	ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
	break;
	case ISD::SRCVALUE:
	ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
	break;
	case ISD::FrameIndex:
	case ISD::TargetFrameIndex:
	ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
	break;
	case ISD::JumpTable:
	case ISD::TargetJumpTable:
	ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
	ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
	break;
	case ISD::ConstantPool:
	case ISD::TargetConstantPool: {
	const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
	ID.AddInteger(CP->getAlignment());
	ID.AddInteger(CP->getOffset());
	if (CP->isMachineConstantPoolEntry())
	CP->getMachineCPVal()->addSelectionDAGCSEId(ID);
	else
	ID.AddPointer(CP->getConstVal());
	ID.AddInteger(CP->getTargetFlags());
	break;
	}
	case ISD::TargetIndex: {
	const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N);
	ID.AddInteger(TI->getIndex());
	ID.AddInteger(TI->getOffset());
	ID.AddInteger(TI->getTargetFlags());
	break;
	}
	case ISD::LOAD: {
	const LoadSDNode *LD = cast<LoadSDNode>(N);
	ID.AddInteger(LD->getMemoryVT().getRawBits());
	ID.AddInteger(LD->getRawSubclassData());
	ID.AddInteger(LD->getPointerInfo().getAddrSpace());
	break;
	}
	case ISD::STORE: {
	const StoreSDNode *ST = cast<StoreSDNode>(N);
	ID.AddInteger(ST->getMemoryVT().getRawBits());
	ID.AddInteger(ST->getRawSubclassData());
	ID.AddInteger(ST->getPointerInfo().getAddrSpace());
	break;
	}
	case ISD::ATOMIC_CMP_SWAP:
	case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
	case ISD::ATOMIC_SWAP:
	case ISD::ATOMIC_LOAD_ADD:
	case ISD::ATOMIC_LOAD_SUB:
	case ISD::ATOMIC_LOAD_AND:
	case ISD::ATOMIC_LOAD_OR:
	case ISD::ATOMIC_LOAD_XOR:
	case ISD::ATOMIC_LOAD_NAND:
	case ISD::ATOMIC_LOAD_MIN:
	case ISD::ATOMIC_LOAD_MAX:
	case ISD::ATOMIC_LOAD_UMIN:
	case ISD::ATOMIC_LOAD_UMAX:
	case ISD::ATOMIC_LOAD:
	case ISD::ATOMIC_STORE: {
	const AtomicSDNode *AT = cast<AtomicSDNode>(N);
	ID.AddInteger(AT->getMemoryVT().getRawBits());
	ID.AddInteger(AT->getRawSubclassData());
	ID.AddInteger(AT->getPointerInfo().getAddrSpace());
	break;
	}
	case ISD::PREFETCH: {
	const MemSDNode *PF = cast<MemSDNode>(N);
	ID.AddInteger(PF->getPointerInfo().getAddrSpace());
	break;
	}
	case ISD::VECTOR_SHUFFLE: {
	const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
	for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
	i != e; ++i)
	ID.AddInteger(SVN->getMaskElt(i));
	break;
	}
	case ISD::TargetBlockAddress:
	case ISD::BlockAddress: {
	const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N);
	ID.AddPointer(BA->getBlockAddress());
	ID.AddInteger(BA->getOffset());
	ID.AddInteger(BA->getTargetFlags());
	break;
	}
	} // end switch (N->getOpcode())

	// Target specific memory nodes could also have address spaces to check.
	if (N->isTargetMemoryOpcode())
	ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
	}

	/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
	/// data.
	static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
	AddNodeIDOpcode(ID, N->getOpcode());
	// Add the return value info.
	AddNodeIDValueTypes(ID, N->getVTList());
	// Add the operand info.
	AddNodeIDOperands(ID, N->ops());

	// Handle SDNode leafs with special info.
	AddNodeIDCustom(ID, N);
	}

	//===----------------------------------------------------------------------===//
	// SelectionDAG Class
	//===----------------------------------------------------------------------===//

	/// doNotCSE - Return true if CSE should not be performed for this node.
	static bool doNotCSE(SDNode *N) {
	if (N->getValueType(0) == MVT::Glue)
	return true; // Never CSE anything that produces a flag.

	switch (N->getOpcode()) {
	default: break;
	case ISD::HANDLENODE:
	case ISD::EH_LABEL:
	return true; // Never CSE these nodes.
	}

	// Check that remaining values produced are not flags.
	for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
	if (N->getValueType(i) == MVT::Glue)
	return true; // Never CSE anything that produces a flag.

	return false;
	}

	/// RemoveDeadNodes - This method deletes all unreachable nodes in the
	/// SelectionDAG.
	void SelectionDAG::RemoveDeadNodes() {
	// Create a dummy node (which is not added to allnodes), that adds a reference
	// to the root node, preventing it from being deleted.
	HandleSDNode Dummy(getRoot());

	SmallVector<SDNode*, 128> DeadNodes;

	// Add all obviously-dead nodes to the DeadNodes worklist.
	for (SDNode &Node : allnodes())
	if (Node.use_empty())
	DeadNodes.push_back(&Node);

	RemoveDeadNodes(DeadNodes);

	// If the root changed (e.g. it was a dead load, update the root).
	setRoot(Dummy.getValue());
	}

	/// RemoveDeadNodes - This method deletes the unreachable nodes in the
	/// given list, and any nodes that become unreachable as a result.
	void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) {

	// Process the worklist, deleting the nodes and adding their uses to the
	// worklist.
	while (!DeadNodes.empty()) {
	SDNode *N = DeadNodes.pop_back_val();
	// Skip to next node if we've already managed to delete the node. This could
	// happen if replacing a node causes a node previously added to the node to
	// be deleted.
	if (N->getOpcode() == ISD::DELETED_NODE)
	continue;

	for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
	DUL->NodeDeleted(N, nullptr);

	// Take the node out of the appropriate CSE map.
	RemoveNodeFromCSEMaps(N);

	// Next, brutally remove the operand list. This is safe to do, as there are
	// no cycles in the graph.
	for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
	SDUse &Use = *I++;
	SDNode *Operand = Use.getNode();
	Use.set(SDValue());

	// Now that we removed this operand, see if there are no uses of it left.
	if (Operand->use_empty())
	DeadNodes.push_back(Operand);
	}

	DeallocateNode(N);
	}
	}

	void SelectionDAG::RemoveDeadNode(SDNode *N){
	SmallVector<SDNode*, 16> DeadNodes(1, N);

	// Create a dummy node that adds a reference to the root node, preventing
	// it from being deleted. (This matters if the root is an operand of the
	// dead node.)
	HandleSDNode Dummy(getRoot());

	RemoveDeadNodes(DeadNodes);
	}

	void SelectionDAG::DeleteNode(SDNode *N) {
	// First take this out of the appropriate CSE map.
	RemoveNodeFromCSEMaps(N);

	// Finally, remove uses due to operands of this node, remove from the
	// AllNodes list, and delete the node.
	DeleteNodeNotInCSEMaps(N);
	}

	void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
	assert(N->getIterator() != AllNodes.begin() &&
	"Cannot delete the entry node!");
	assert(N->use_empty() && "Cannot delete a node that is not dead!");

	// Drop all of the operands and decrement used node's use counts.
	N->DropOperands();

	DeallocateNode(N);
	}

	void SDDbgInfo::erase(const SDNode *Node) {
	DbgValMapType::iterator I = DbgValMap.find(Node);
	if (I == DbgValMap.end())
	return;
	for (auto &Val: I->second)
	Val->setIsInvalidated();
	DbgValMap.erase(I);
	}

	void SelectionDAG::DeallocateNode(SDNode *N) {
	// If we have operands, deallocate them.
	removeOperands(N);

	NodeAllocator.Deallocate(AllNodes.remove(N));

	// Set the opcode to DELETED_NODE to help catch bugs when node
	// memory is reallocated.
	// FIXME: There are places in SDag that have grown a dependency on the opcode
	// value in the released node.
	__asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType));
	N->NodeType = ISD::DELETED_NODE;

	// If any of the SDDbgValue nodes refer to this SDNode, invalidate
	// them and forget about that node.
	DbgInfo->erase(N);
	}

	#ifndef NDEBUG
	/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
	static void VerifySDNode(SDNode *N) {
	switch (N->getOpcode()) {
	default:
	break;
	case ISD::BUILD_PAIR: {
	EVT VT = N->getValueType(0);
	assert(N->getNumValues() == 1 && "Too many results!");
	assert(!VT.isVector() && (VT.isInteger() \|\| VT.isFloatingPoint()) &&
	"Wrong return type!");
	assert(N->getNumOperands() == 2 && "Wrong number of operands!");
	assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
	"Mismatched operand types!");
	assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
	"Wrong operand type!");
	assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
	"Wrong return type size");
	break;
	}
	case ISD::BUILD_VECTOR: {
	assert(N->getNumValues() == 1 && "Too many results!");
	assert(N->getValueType(0).isVector() && "Wrong return type!");
	assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
	"Wrong number of operands!");
	EVT EltVT = N->getValueType(0).getVectorElementType();
	for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
	assert((I->getValueType() == EltVT \|\|
	(EltVT.isInteger() && I->getValueType().isInteger() &&
	EltVT.bitsLE(I->getValueType()))) &&
	"Wrong operand type!");
	assert(I->getValueType() == N->getOperand(0).getValueType() &&
	"Operands must all have the same type");
	}
	break;
	}
	}
	}
	#endif // NDEBUG

	/// \brief Insert a newly allocated node into the DAG.
	///
	/// Handles insertion into the all nodes list and CSE map, as well as
	/// verification and other common operations when a new node is allocated.
	void SelectionDAG::InsertNode(SDNode *N) {
	AllNodes.push_back(N);
	#ifndef NDEBUG
	N->PersistentId = NextPersistentId++;
	VerifySDNode(N);
	#endif
	}

	/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
	/// correspond to it. This is useful when we're about to delete or repurpose
	/// the node. We don't want future request for structurally identical nodes
	/// to return N anymore.
	bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
	bool Erased = false;
	switch (N->getOpcode()) {
	case ISD::HANDLENODE: return false; // noop.
	case ISD::CONDCODE:
	assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
	"Cond code doesn't exist!");
	Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr;
	CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr;
	break;
	case ISD::ExternalSymbol:
	Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
	break;
	case ISD::TargetExternalSymbol: {
	ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
	Erased = TargetExternalSymbols.erase(
	std::pair<std::string,unsigned char>(ESN->getSymbol(),
	ESN->getTargetFlags()));
	break;
	}
	case ISD::MCSymbol: {
	auto *MCSN = cast<MCSymbolSDNode>(N);
	Erased = MCSymbols.erase(MCSN->getMCSymbol());
	break;
	}
	case ISD::VALUETYPE: {
	EVT VT = cast<VTSDNode>(N)->getVT();
	if (VT.isExtended()) {
	Erased = ExtendedValueTypeNodes.erase(VT);
	} else {
	Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr;
	ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr;
	}
	break;
	}
	default:
	// Remove it from the CSE Map.
	assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
	assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
	Erased = CSEMap.RemoveNode(N);
	break;
	}
	#ifndef NDEBUG
	// Verify that the node was actually in one of the CSE maps, unless it has a
	// flag result (which cannot be CSE'd) or is one of the special cases that are
	// not subject to CSE.
	if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
	!N->isMachineOpcode() && !doNotCSE(N)) {
	N->dump(this);
	dbgs() << "\n";
	llvm_unreachable("Node is not in map!");
	}
	#endif
	return Erased;
	}

	/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
	/// maps and modified in place. Add it back to the CSE maps, unless an identical
	/// node already exists, in which case transfer all its users to the existing
	/// node. This transfer can potentially trigger recursive merging.
	void
	SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
	// For node types that aren't CSE'd, just act as if no identical node
	// already exists.
	if (!doNotCSE(N)) {
	SDNode *Existing = CSEMap.GetOrInsertNode(N);
	if (Existing != N) {
	// If there was already an existing matching node, use ReplaceAllUsesWith
	// to replace the dead one with the existing one. This can cause
	// recursive merging of other unrelated nodes down the line.
	ReplaceAllUsesWith(N, Existing);

	// N is now dead. Inform the listeners and delete it.
	for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
	DUL->NodeDeleted(N, Existing);
	DeleteNodeNotInCSEMaps(N);
	return;
	}
	}

	// If the node doesn't already exist, we updated it. Inform listeners.
	for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
	DUL->NodeUpdated(N);
	}

	/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
	/// were replaced with those specified. If this node is never memoized,
	/// return null, otherwise return a pointer to the slot it would take. If a
	/// node already exists with these operands, the slot will be non-null.
	SDNode SelectionDAG::FindModifiedNodeSlot(SDNode N, SDValue Op,
	void *&InsertPos) {
	if (doNotCSE(N))
	return nullptr;

	SDValue Ops[] = { Op };
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
	AddNodeIDCustom(ID, N);
	SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
	if (Node)
	Node->intersectFlagsWith(N->getFlags());
	return Node;
	}

	/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
	/// were replaced with those specified. If this node is never memoized,
	/// return null, otherwise return a pointer to the slot it would take. If a
	/// node already exists with these operands, the slot will be non-null.
	SDNode SelectionDAG::FindModifiedNodeSlot(SDNode N,
	SDValue Op1, SDValue Op2,
	void *&InsertPos) {
	if (doNotCSE(N))
	return nullptr;

	SDValue Ops[] = { Op1, Op2 };
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
	AddNodeIDCustom(ID, N);
	SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
	if (Node)
	Node->intersectFlagsWith(N->getFlags());
	return Node;
	}

	/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
	/// were replaced with those specified. If this node is never memoized,
	/// return null, otherwise return a pointer to the slot it would take. If a
	/// node already exists with these operands, the slot will be non-null.
	SDNode SelectionDAG::FindModifiedNodeSlot(SDNode N, ArrayRef<SDValue> Ops,
	void *&InsertPos) {
	if (doNotCSE(N))
	return nullptr;

	FoldingSetNodeID ID;
	AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
	AddNodeIDCustom(ID, N);
	SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
	if (Node)
	Node->intersectFlagsWith(N->getFlags());
	return Node;
	}

	unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
	Type *Ty = VT == MVT::iPTR ?
	PointerType::get(Type::getInt8Ty(*getContext()), 0) :
	VT.getTypeForEVT(*getContext());

	return getDataLayout().getABITypeAlignment(Ty);
	}

	// EntryNode could meaningfully have debug info if we can find it...
	SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
	: TM(tm), OptLevel(OL),
	EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
	Root(getEntryNode()) {
	InsertNode(&EntryNode);
	DbgInfo = new SDDbgInfo();
	}

	void SelectionDAG::init(MachineFunction &NewMF,
	OptimizationRemarkEmitter &NewORE) {
	MF = &NewMF;
	ORE = &NewORE;
	TLI = getSubtarget().getTargetLowering();
	TSI = getSubtarget().getSelectionDAGInfo();
	Context = &MF->getFunction()->getContext();
	}

	SelectionDAG::~SelectionDAG() {
	assert(!UpdateListeners && "Dangling registered DAGUpdateListeners");
	allnodes_clear();
	OperandRecycler.clear(OperandAllocator);
	delete DbgInfo;
	}

	void SelectionDAG::allnodes_clear() {
	assert(&*AllNodes.begin() == &EntryNode);
	AllNodes.remove(AllNodes.begin());
	while (!AllNodes.empty())
	DeallocateNode(&AllNodes.front());
	#ifndef NDEBUG
	NextPersistentId = 0;
	#endif
	}

	SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
	void *&InsertPos) {
	SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
	if (N) {
	switch (N->getOpcode()) {
	default: break;
	case ISD::Constant:
	case ISD::ConstantFP:
	llvm_unreachable("Querying for Constant and ConstantFP nodes requires "
	"debug location. Use another overload.");
	}
	}
	return N;
	}

	SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
	const SDLoc &DL, void *&InsertPos) {
	SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
	if (N) {
	switch (N->getOpcode()) {
	case ISD::Constant:
	case ISD::ConstantFP:
	// Erase debug location from the node if the node is used at several
	// different places. Do not propagate one location to all uses as it
	// will cause a worse single stepping debugging experience.
	if (N->getDebugLoc() != DL.getDebugLoc())
	N->setDebugLoc(DebugLoc());
	break;
	default:
	// When the node's point of use is located earlier in the instruction
	// sequence than its prior point of use, update its debug info to the
	// earlier location.
	if (DL.getIROrder() && DL.getIROrder() < N->getIROrder())
	N->setDebugLoc(DL.getDebugLoc());
	break;
	}
	}
	return N;
	}

	void SelectionDAG::clear() {
	allnodes_clear();
	OperandRecycler.clear(OperandAllocator);
	OperandAllocator.Reset();
	CSEMap.clear();

	ExtendedValueTypeNodes.clear();
	ExternalSymbols.clear();
	TargetExternalSymbols.clear();
	MCSymbols.clear();
	std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
	static_cast<CondCodeSDNode*>(nullptr));
	std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
	static_cast<SDNode*>(nullptr));

	EntryNode.UseList = nullptr;
	InsertNode(&EntryNode);
	Root = getEntryNode();
	DbgInfo->clear();
	}

	SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
	return VT.bitsGT(Op.getValueType())
	? getNode(ISD::FP_EXTEND, DL, VT, Op)
	: getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
	}

	SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
	return VT.bitsGT(Op.getValueType()) ?
	getNode(ISD::ANY_EXTEND, DL, VT, Op) :
	getNode(ISD::TRUNCATE, DL, VT, Op);
	}

	SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
	return VT.bitsGT(Op.getValueType()) ?
	getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
	getNode(ISD::TRUNCATE, DL, VT, Op);
	}

	SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
	return VT.bitsGT(Op.getValueType()) ?
	getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
	getNode(ISD::TRUNCATE, DL, VT, Op);
	}

	SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT,
	EVT OpVT) {
	if (VT.bitsLE(Op.getValueType()))
	return getNode(ISD::TRUNCATE, SL, VT, Op);

	TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT);
	return getNode(TLI->getExtendForContent(BType), SL, VT, Op);
	}

	SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
	assert(!VT.isVector() &&
	"getZeroExtendInReg should use the vector element type instead of "
	"the vector type!");
	if (Op.getValueType() == VT) return Op;
	unsigned BitWidth = Op.getScalarValueSizeInBits();
	APInt Imm = APInt::getLowBitsSet(BitWidth,
	VT.getSizeInBits());
	return getNode(ISD::AND, DL, Op.getValueType(), Op,
	getConstant(Imm, DL, Op.getValueType()));
	}

	SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
	EVT VT) {
	assert(VT.isVector() && "This DAG node is restricted to vector types.");
	assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
	"The sizes of the input and result must match in order to perform the "
	"extend in-register.");
	assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
	"The destination vector type must have fewer lanes than the input.");
	return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op);
	}

	SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
	EVT VT) {
	assert(VT.isVector() && "This DAG node is restricted to vector types.");
	assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
	"The sizes of the input and result must match in order to perform the "
	"extend in-register.");
	assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
	"The destination vector type must have fewer lanes than the input.");
	return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op);
	}

	SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
	EVT VT) {
	assert(VT.isVector() && "This DAG node is restricted to vector types.");
	assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
	"The sizes of the input and result must match in order to perform the "
	"extend in-register.");
	assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
	"The destination vector type must have fewer lanes than the input.");
	return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op);
	}

	/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
	SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
	EVT EltVT = VT.getScalarType();
	SDValue NegOne =
	getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);
	return getNode(ISD::XOR, DL, VT, Val, NegOne);
	}

	SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
	EVT EltVT = VT.getScalarType();
	SDValue TrueValue;
	switch (TLI->getBooleanContents(VT)) {
	case TargetLowering::ZeroOrOneBooleanContent:
	case TargetLowering::UndefinedBooleanContent:
	TrueValue = getConstant(1, DL, VT);
	break;
	case TargetLowering::ZeroOrNegativeOneBooleanContent:
	TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL,
	VT);
	break;
	}
	return getNode(ISD::XOR, DL, VT, Val, TrueValue);
	}

	SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
	bool isT, bool isO) {
	EVT EltVT = VT.getScalarType();
	assert((EltVT.getSizeInBits() >= 64 \|\|
	(uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
	"getConstant with a uint64_t value that doesn't fit in the type!");
	return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO);
	}

	SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT,
	bool isT, bool isO) {
	return getConstant(ConstantInt::get(Context, Val), DL, VT, isT, isO);
	}

	SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
	EVT VT, bool isT, bool isO) {
	assert(VT.isInteger() && "Cannot create FP integer constant!");

	EVT EltVT = VT.getScalarType();
	const ConstantInt *Elt = &Val;

	// In some cases the vector type is legal but the element type is illegal and
	// needs to be promoted, for example v8i8 on ARM. In this case, promote the
	// inserted value (the type does not need to match the vector element type).
	// Any extra bits introduced will be truncated away.
	if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
	TargetLowering::TypePromoteInteger) {
	EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
	APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
	Elt = ConstantInt::get(*getContext(), NewVal);
	}
	// In other cases the element type is illegal and needs to be expanded, for
	// example v2i64 on MIPS32. In this case, find the nearest legal type, split
	// the value into n parts and use a vector type with n-times the elements.
	// Then bitcast to the type requested.
	// Legalizing constants too early makes the DAGCombiner's job harder so we
	// only legalize if the DAG tells us we must produce legal types.
	else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
	TLI->getTypeAction(*getContext(), EltVT) ==
	TargetLowering::TypeExpandInteger) {
	const APInt &NewVal = Elt->getValue();
	EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
	unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
	unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
	EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);

	// Check the temporary vector is the correct size. If this fails then
	// getTypeToTransformTo() probably returned a type whose size (in bits)
	// isn't a power-of-2 factor of the requested type size.
	assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());

	SmallVector<SDValue, 2> EltParts;
	for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
	EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
	.zextOrTrunc(ViaEltSizeInBits), DL,
	ViaEltVT, isT, isO));
	}

	// EltParts is currently in little endian order. If we actually want
	// big-endian order then reverse it now.
	if (getDataLayout().isBigEndian())
	std::reverse(EltParts.begin(), EltParts.end());

	// The elements must be reversed when the element order is different
	// to the endianness of the elements (because the BITCAST is itself a
	// vector shuffle in this situation). However, we do not need any code to
	// perform this reversal because getConstant() is producing a vector
	// splat.
	// This situation occurs in MIPS MSA.

	SmallVector<SDValue, 8> Ops;
	for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
	Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
	return getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
	}

	assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
	"APInt size does not match type size!");
	unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
	ID.AddPointer(Elt);
	ID.AddBoolean(isO);
	void *IP = nullptr;
	SDNode *N = nullptr;
	if ((N = FindNodeOrInsertPos(ID, DL, IP)))
	if (!VT.isVector())
	return SDValue(N, 0);

	if (!N) {
	N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	}

	SDValue Result(N, 0);
	if (VT.isVector())
	Result = getSplatBuildVector(VT, DL, Result);
	return Result;
	}

	SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,
	bool isTarget) {
	return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget);
	}

	SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT,
	bool isTarget) {
	return getConstantFP(ConstantFP::get(getContext(), V), DL, VT, isTarget);
	}

	SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
	EVT VT, bool isTarget) {
	assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");

	EVT EltVT = VT.getScalarType();

	// Do the map lookup using the actual bit pattern for the floating point
	// value, so that we don't have problems with 0.0 comparing equal to -0.0, and
	// we don't have issues with SNANs.
	unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
	ID.AddPointer(&V);
	void *IP = nullptr;
	SDNode *N = nullptr;
	if ((N = FindNodeOrInsertPos(ID, DL, IP)))
	if (!VT.isVector())
	return SDValue(N, 0);

	if (!N) {
	N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	}

	SDValue Result(N, 0);
	if (VT.isVector())
	Result = getSplatBuildVector(VT, DL, Result);
	return Result;
	}

	SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,
	bool isTarget) {
	EVT EltVT = VT.getScalarType();
	if (EltVT == MVT::f32)
	return getConstantFP(APFloat((float)Val), DL, VT, isTarget);
	else if (EltVT == MVT::f64)
	return getConstantFP(APFloat(Val), DL, VT, isTarget);
	else if (EltVT == MVT::f80 \|\| EltVT == MVT::f128 \|\| EltVT == MVT::ppcf128 \|\|
	EltVT == MVT::f16) {
	bool Ignored;
	APFloat APF = APFloat(Val);
	APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
	&Ignored);
	return getConstantFP(APF, DL, VT, isTarget);
	} else
	llvm_unreachable("Unsupported type in getConstantFP");
	}

	SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
	EVT VT, int64_t Offset, bool isTargetGA,
	unsigned char TargetFlags) {
	assert((TargetFlags == 0 \|\| isTargetGA) &&
	"Cannot set target flags on target-independent globals");

	// Truncate (with sign-extension) the offset value to the pointer size.
	unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
	if (BitWidth < 64)
	Offset = SignExtend64(Offset, BitWidth);

	unsigned Opc;
	if (GV->isThreadLocal())
	Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
	else
	Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;

	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opc, getVTList(VT), None);
	ID.AddPointer(GV);
	ID.AddInteger(Offset);
	ID.AddInteger(TargetFlags);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<GlobalAddressSDNode>(
	Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VT, Offset, TargetFlags);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
	unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opc, getVTList(VT), None);
	ID.AddInteger(FI);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<FrameIndexSDNode>(FI, VT, isTarget);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
	unsigned char TargetFlags) {
	assert((TargetFlags == 0 \|\| isTarget) &&
	"Cannot set target flags on target-independent jump tables");
	unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opc, getVTList(VT), None);
	ID.AddInteger(JTI);
	ID.AddInteger(TargetFlags);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<JumpTableSDNode>(JTI, VT, isTarget, TargetFlags);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
	unsigned Alignment, int Offset,
	bool isTarget,
	unsigned char TargetFlags) {
	assert((TargetFlags == 0 \|\| isTarget) &&
	"Cannot set target flags on target-independent globals");
	if (Alignment == 0)
	Alignment = MF->getFunction()->optForSize()
	? getDataLayout().getABITypeAlignment(C->getType())
	: getDataLayout().getPrefTypeAlignment(C->getType());
	unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opc, getVTList(VT), None);
	ID.AddInteger(Alignment);
	ID.AddInteger(Offset);
	ID.AddPointer(C);
	ID.AddInteger(TargetFlags);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
	TargetFlags);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
	unsigned Alignment, int Offset,
	bool isTarget,
	unsigned char TargetFlags) {
	assert((TargetFlags == 0 \|\| isTarget) &&
	"Cannot set target flags on target-independent globals");
	if (Alignment == 0)
	Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
	unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opc, getVTList(VT), None);
	ID.AddInteger(Alignment);
	ID.AddInteger(Offset);
	C->addSelectionDAGCSEId(ID);
	ID.AddInteger(TargetFlags);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
	TargetFlags);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
	unsigned char TargetFlags) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None);
	ID.AddInteger(Index);
	ID.AddInteger(Offset);
	ID.AddInteger(TargetFlags);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None);
	ID.AddPointer(MBB);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<BasicBlockSDNode>(MBB);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getValueType(EVT VT) {
	if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
	ValueTypeNodes.size())
	ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);

	SDNode *&N = VT.isExtended() ?
	ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];

	if (N) return SDValue(N, 0);
	N = newSDNode<VTSDNode>(VT);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
	SDNode *&N = ExternalSymbols[Sym];
	if (N) return SDValue(N, 0);
	N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, VT);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {
	SDNode *&N = MCSymbols[Sym];
	if (N)
	return SDValue(N, 0);
	N = newSDNode<MCSymbolSDNode>(Sym, VT);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
	unsigned char TargetFlags) {
	SDNode *&N =
	TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
	TargetFlags)];
	if (N) return SDValue(N, 0);
	N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
	if ((unsigned)Cond >= CondCodeNodes.size())
	CondCodeNodes.resize(Cond+1);

	if (!CondCodeNodes[Cond]) {
	auto *N = newSDNode<CondCodeSDNode>(Cond);
	CondCodeNodes[Cond] = N;
	InsertNode(N);
	}

	return SDValue(CondCodeNodes[Cond], 0);
	}

	/// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that
	/// point at N1 to point at N2 and indices that point at N2 to point at N1.
	static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) {
	std::swap(N1, N2);
	ShuffleVectorSDNode::commuteMask(M);
	}

	SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
	SDValue N2, ArrayRef<int> Mask) {
	assert(VT.getVectorNumElements() == Mask.size() &&
	"Must have the same number of vector elements as mask elements!");
	assert(VT == N1.getValueType() && VT == N2.getValueType() &&
	"Invalid VECTOR_SHUFFLE");

	// Canonicalize shuffle undef, undef -> undef
	if (N1.isUndef() && N2.isUndef())
	return getUNDEF(VT);

	// Validate that all indices in Mask are within the range of the elements
	// input to the shuffle.
	int NElts = Mask.size();
	assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
	"Index out of range");

	// Copy the mask so we can do any needed cleanup.
	SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end());

	// Canonicalize shuffle v, v -> v, undef
	if (N1 == N2) {
	N2 = getUNDEF(VT);
	for (int i = 0; i != NElts; ++i)
	if (MaskVec[i] >= NElts) MaskVec[i] -= NElts;
	}

	// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
	if (N1.isUndef())
	commuteShuffle(N1, N2, MaskVec);

	// If shuffling a splat, try to blend the splat instead. We do this here so
	// that even when this arises during lowering we don't have to re-handle it.
	auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
	BitVector UndefElements;
	SDValue Splat = BV->getSplatValue(&UndefElements);
	if (!Splat)
	return;

	for (int i = 0; i < NElts; ++i) {
	if (MaskVec[i] < Offset \|\| MaskVec[i] >= (Offset + NElts))
	continue;

	// If this input comes from undef, mark it as such.
	if (UndefElements[MaskVec[i] - Offset]) {
	MaskVec[i] = -1;
	continue;
	}

	// If we can blend a non-undef lane, use that instead.
	if (!UndefElements[i])
	MaskVec[i] = i + Offset;
	}
	};
	if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
	BlendSplat(N1BV, 0);
	if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
	BlendSplat(N2BV, NElts);

	// Canonicalize all index into lhs, -> shuffle lhs, undef
	// Canonicalize all index into rhs, -> shuffle rhs, undef
	bool AllLHS = true, AllRHS = true;
	bool N2Undef = N2.isUndef();
	for (int i = 0; i != NElts; ++i) {
	if (MaskVec[i] >= NElts) {
	if (N2Undef)
	MaskVec[i] = -1;
	else
	AllLHS = false;
	} else if (MaskVec[i] >= 0) {
	AllRHS = false;
	}
	}
	if (AllLHS && AllRHS)
	return getUNDEF(VT);
	if (AllLHS && !N2Undef)
	N2 = getUNDEF(VT);
	if (AllRHS) {
	N1 = getUNDEF(VT);
	commuteShuffle(N1, N2, MaskVec);
	}
	// Reset our undef status after accounting for the mask.
	N2Undef = N2.isUndef();
	// Re-check whether both sides ended up undef.
	if (N1.isUndef() && N2Undef)
	return getUNDEF(VT);

	// If Identity shuffle return that node.
	bool Identity = true, AllSame = true;
	for (int i = 0; i != NElts; ++i) {
	if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false;
	if (MaskVec[i] != MaskVec[0]) AllSame = false;
	}
	if (Identity && NElts)
	return N1;

	// Shuffling a constant splat doesn't change the result.
	if (N2Undef) {
	SDValue V = N1;

	// Look through any bitcasts. We check that these don't change the number
	// (and size) of elements and just changes their types.
	while (V.getOpcode() == ISD::BITCAST)
	V = V->getOperand(0);

	// A splat should always show up as a build vector node.
	if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
	BitVector UndefElements;
	SDValue Splat = BV->getSplatValue(&UndefElements);
	// If this is a splat of an undef, shuffling it is also undef.
	if (Splat && Splat.isUndef())
	return getUNDEF(VT);

	bool SameNumElts =
	V.getValueType().getVectorNumElements() == VT.getVectorNumElements();

	// We only have a splat which can skip shuffles if there is a splatted
	// value and no undef lanes rearranged by the shuffle.
	if (Splat && UndefElements.none()) {
	// Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
	// number of elements match or the value splatted is a zero constant.
	if (SameNumElts)
	return N1;
	if (auto *C = dyn_cast<ConstantSDNode>(Splat))
	if (C->isNullValue())
	return N1;
	}

	// If the shuffle itself creates a splat, build the vector directly.
	if (AllSame && SameNumElts) {
	EVT BuildVT = BV->getValueType(0);
	const SDValue &Splatted = BV->getOperand(MaskVec[0]);
	SDValue NewBV = getSplatBuildVector(BuildVT, dl, Splatted);

	// We may have jumped through bitcasts, so the type of the
	// BUILD_VECTOR may not match the type of the shuffle.
	if (BuildVT != VT)
	NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
	return NewBV;
	}
	}
	}

	FoldingSetNodeID ID;
	SDValue Ops[2] = { N1, N2 };
	AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops);
	for (int i = 0; i != NElts; ++i)
	ID.AddInteger(MaskVec[i]);

	void* IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
	return SDValue(E, 0);

	// Allocate the mask array for the node out of the BumpPtrAllocator, since
	// SDNode doesn't have access to it. This memory will be "leaked" when
	// the node is deallocated, but recovered when the NodeAllocator is released.
	int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
	std::copy(MaskVec.begin(), MaskVec.end(), MaskAlloc);

	auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(),
	dl.getDebugLoc(), MaskAlloc);
	createOperands(N, Ops);

	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
	MVT VT = SV.getSimpleValueType(0);
	SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
	ShuffleVectorSDNode::commuteMask(MaskVec);

	SDValue Op0 = SV.getOperand(0);
	SDValue Op1 = SV.getOperand(1);
	return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec);
	}

	SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::Register, getVTList(VT), None);
	ID.AddInteger(RegNo);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None);
	ID.AddPointer(RegMask);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<RegisterMaskSDNode>(RegMask);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root,
	MCSymbol *Label) {
	FoldingSetNodeID ID;
	SDValue Ops[] = { Root };
	AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops);
	ID.AddPointer(Label);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<EHLabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
	createOperands(N, Ops);

	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
	int64_t Offset,
	bool isTarget,
	unsigned char TargetFlags) {
	unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;

	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opc, getVTList(VT), None);
	ID.AddPointer(BA);
	ID.AddInteger(Offset);
	ID.AddInteger(TargetFlags);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<BlockAddressSDNode>(Opc, VT, BA, Offset, TargetFlags);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getSrcValue(const Value *V) {
	assert((!V \|\| V->getType()->isPointerTy()) &&
	"SrcValue is not a pointer?");

	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
	ID.AddPointer(V);

	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<SrcValueSDNode>(V);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getMDNode(const MDNode *MD) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None);
	ID.AddPointer(MD);

	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<MDNodeSDNode>(MD);
	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) {
	if (VT == V.getValueType())
	return V;

	return getNode(ISD::BITCAST, SDLoc(V), VT, V);
	}

	SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr,
	unsigned SrcAS, unsigned DestAS) {
	SDValue Ops[] = {Ptr};
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops);
	ID.AddInteger(SrcAS);
	ID.AddInteger(DestAS);

	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(),
	VT, SrcAS, DestAS);
	createOperands(N, Ops);

	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	/// getShiftAmountOperand - Return the specified value casted to
	/// the target's desired shift amount type.
	SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
	EVT OpTy = Op.getValueType();
	EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout());
	if (OpTy == ShTy \|\| OpTy.isVector()) return Op;

	return getZExtOrTrunc(Op, SDLoc(Op), ShTy);
	}

	SDValue SelectionDAG::expandVAArg(SDNode *Node) {
	SDLoc dl(Node);
	const TargetLowering &TLI = getTargetLoweringInfo();
	const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
	EVT VT = Node->getValueType(0);
	SDValue Tmp1 = Node->getOperand(0);
	SDValue Tmp2 = Node->getOperand(1);
	unsigned Align = Node->getConstantOperandVal(3);

	SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1,
	Tmp2, MachinePointerInfo(V));
	SDValue VAList = VAListLoad;

	if (Align > TLI.getMinStackArgumentAlignment()) {
	assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");

	VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
	getConstant(Align - 1, dl, VAList.getValueType()));

	VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList,
	getConstant(-(int64_t)Align, dl, VAList.getValueType()));
	}

	// Increment the pointer, VAList, to the next vaarg
	Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
	getConstant(getDataLayout().getTypeAllocSize(
	VT.getTypeForEVT(*getContext())),
	dl, VAList.getValueType()));
	// Store the incremented VAList to the legalized pointer
	Tmp1 =
	getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, MachinePointerInfo(V));
	// Load the actual argument out of the pointer VAList
	return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo());
	}

	SDValue SelectionDAG::expandVACopy(SDNode *Node) {
	SDLoc dl(Node);
	const TargetLowering &TLI = getTargetLoweringInfo();
	// This defaults to loading a pointer from the input and storing it to the
	// output, returning the chain.
	const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
	const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
	SDValue Tmp1 =
	getLoad(TLI.getPointerTy(getDataLayout()), dl, Node->getOperand(0),
	Node->getOperand(2), MachinePointerInfo(VS));
	return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
	MachinePointerInfo(VD));
	}

	SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
	MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
	unsigned ByteSize = VT.getStoreSize();
	Type Ty = VT.getTypeForEVT(getContext());
	unsigned StackAlign =
	std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);

	int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
	return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
	}

	SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
	unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
	Type Ty1 = VT1.getTypeForEVT(getContext());
	Type Ty2 = VT2.getTypeForEVT(getContext());
	const DataLayout &DL = getDataLayout();
	unsigned Align =
	std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2));

	MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
	int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
	return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
	}

	SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
	ISD::CondCode Cond, const SDLoc &dl) {
	// These setcc operations always fold.
	switch (Cond) {
	default: break;
	case ISD::SETFALSE:
	case ISD::SETFALSE2: return getConstant(0, dl, VT);
	case ISD::SETTRUE:
	case ISD::SETTRUE2: {
	TargetLowering::BooleanContent Cnt =
	TLI->getBooleanContents(N1->getValueType(0));
	return getConstant(
	Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
	VT);
	}

	case ISD::SETOEQ:
	case ISD::SETOGT:
	case ISD::SETOGE:
	case ISD::SETOLT:
	case ISD::SETOLE:
	case ISD::SETONE:
	case ISD::SETO:
	case ISD::SETUO:
	case ISD::SETUEQ:
	case ISD::SETUNE:
	assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
	break;
	}

	if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) {
	const APInt &C2 = N2C->getAPIntValue();
	if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
	const APInt &C1 = N1C->getAPIntValue();

	switch (Cond) {
	default: llvm_unreachable("Unknown integer setcc!");
	case ISD::SETEQ: return getConstant(C1 == C2, dl, VT);
	case ISD::SETNE: return getConstant(C1 != C2, dl, VT);
	case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT);
	case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT);
	case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT);
	case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT);
	case ISD::SETLT: return getConstant(C1.slt(C2), dl, VT);
	case ISD::SETGT: return getConstant(C1.sgt(C2), dl, VT);
	case ISD::SETLE: return getConstant(C1.sle(C2), dl, VT);
	case ISD::SETGE: return getConstant(C1.sge(C2), dl, VT);
	}
	}
	}
	if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) {
	if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) {
	APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
	switch (Cond) {
	default: break;
	case ISD::SETEQ: if (R==APFloat::cmpUnordered)
	return getUNDEF(VT);
	LLVM_FALLTHROUGH;
	case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);
	case ISD::SETNE: if (R==APFloat::cmpUnordered)
	return getUNDEF(VT);
	LLVM_FALLTHROUGH;
	case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan \|\|
	R==APFloat::cmpLessThan, dl, VT);
	case ISD::SETLT: if (R==APFloat::cmpUnordered)
	return getUNDEF(VT);
	LLVM_FALLTHROUGH;
	case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);
	case ISD::SETGT: if (R==APFloat::cmpUnordered)
	return getUNDEF(VT);
	LLVM_FALLTHROUGH;
	case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);
	case ISD::SETLE: if (R==APFloat::cmpUnordered)
	return getUNDEF(VT);
	LLVM_FALLTHROUGH;
	case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan \|\|
	R==APFloat::cmpEqual, dl, VT);
	case ISD::SETGE: if (R==APFloat::cmpUnordered)
	return getUNDEF(VT);
	LLVM_FALLTHROUGH;
	case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan \|\|
	R==APFloat::cmpEqual, dl, VT);
	case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT);
	case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, dl, VT);
	case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered \|\|
	R==APFloat::cmpEqual, dl, VT);
	case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT);
	case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered \|\|
	R==APFloat::cmpLessThan, dl, VT);
	case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan \|\|
	R==APFloat::cmpUnordered, dl, VT);
	case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT);
	case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT);
	}
	} else {
	// Ensure that the constant occurs on the RHS.
	ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
	MVT CompVT = N1.getValueType().getSimpleVT();
	if (!TLI->isCondCodeLegal(SwappedCond, CompVT))
	return SDValue();

	return getSetCC(dl, VT, N2, N1, SwappedCond);
	}
	}

	// Could not fold it.
	return SDValue();
	}

	/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
	/// use this predicate to simplify operations downstream.
	bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
	unsigned BitWidth = Op.getScalarValueSizeInBits();
	return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth);
	}

	/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
	/// this predicate to simplify operations downstream. Mask is known to be zero
	/// for bits that V cannot have.
	bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
	unsigned Depth) const {
	KnownBits Known;
	computeKnownBits(Op, Known, Depth);
	return Mask.isSubsetOf(Known.Zero);
	}

	/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
	/// is less than the element bit-width of the shift node, return it.
	static const APInt *getValidShiftAmountConstant(SDValue V) {
	if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {
	// Shifting more than the bitwidth is not valid.
	const APInt &ShAmt = SA->getAPIntValue();
	if (ShAmt.ult(V.getScalarValueSizeInBits()))
	return &ShAmt;
	}
	return nullptr;
	}

	/// Determine which bits of Op are known to be either zero or one and return
	/// them in Known. For vectors, the known bits are those that are shared by
	/// every vector element.
	void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
	unsigned Depth) const {
	EVT VT = Op.getValueType();
	APInt DemandedElts = VT.isVector()
	? APInt::getAllOnesValue(VT.getVectorNumElements())
	: APInt(1, 1);
	computeKnownBits(Op, Known, DemandedElts, Depth);
	}

	/// Determine which bits of Op are known to be either zero or one and return
	/// them in Known. The DemandedElts argument allows us to only collect the known
	/// bits that are shared by the requested vector elements.
	void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
	const APInt &DemandedElts,
	unsigned Depth) const {
	unsigned BitWidth = Op.getScalarValueSizeInBits();

	Known = KnownBits(BitWidth); // Don't know anything.
	if (Depth == 6)
	return; // Limit search depth.

	KnownBits Known2;
	unsigned NumElts = DemandedElts.getBitWidth();

	if (!DemandedElts)
	return; // No demanded elts, better to assume we don't know anything.

	unsigned Opcode = Op.getOpcode();
	switch (Opcode) {
	case ISD::Constant:
	// We know all of the bits for a constant!
	Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
	Known.Zero = ~Known.One;
	break;
	case ISD::BUILD_VECTOR:
	// Collect the known bits that are shared by every demanded vector element.
	assert(NumElts == Op.getValueType().getVectorNumElements() &&
	"Unexpected vector size");
	Known.Zero.setAllBits(); Known.One.setAllBits();
	for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
	if (!DemandedElts[i])
	continue;

	SDValue SrcOp = Op.getOperand(i);
	computeKnownBits(SrcOp, Known2, Depth + 1);

	// BUILD_VECTOR can implicitly truncate sources, we must handle this.
	if (SrcOp.getValueSizeInBits() != BitWidth) {
	assert(SrcOp.getValueSizeInBits() > BitWidth &&
	"Expected BUILD_VECTOR implicit truncation");
	Known2 = Known2.trunc(BitWidth);
	}

	// Known bits are the values that are shared by every demanded element.
	Known.One &= Known2.One;
	Known.Zero &= Known2.Zero;

	// If we don't know any bits, early out.
	if (!Known.One && !Known.Zero)
	break;
	}
	break;
	case ISD::VECTOR_SHUFFLE: {
	// Collect the known bits that are shared by every vector element referenced
	// by the shuffle.
	APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
	Known.Zero.setAllBits(); Known.One.setAllBits();
	const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
	assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
	for (unsigned i = 0; i != NumElts; ++i) {
	if (!DemandedElts[i])
	continue;

	int M = SVN->getMaskElt(i);
	if (M < 0) {
	// For UNDEF elements, we don't know anything about the common state of
	// the shuffle result.
	Known.resetAll();
	DemandedLHS.clearAllBits();
	DemandedRHS.clearAllBits();
	break;
	}

	if ((unsigned)M < NumElts)
	DemandedLHS.setBit((unsigned)M % NumElts);
	else
	DemandedRHS.setBit((unsigned)M % NumElts);
	}
	// Known bits are the values that are shared by every demanded element.
	if (!!DemandedLHS) {
	SDValue LHS = Op.getOperand(0);
	computeKnownBits(LHS, Known2, DemandedLHS, Depth + 1);
	Known.One &= Known2.One;
	Known.Zero &= Known2.Zero;
	}
	// If we don't know any bits, early out.
	if (!Known.One && !Known.Zero)
	break;
	if (!!DemandedRHS) {
	SDValue RHS = Op.getOperand(1);
	computeKnownBits(RHS, Known2, DemandedRHS, Depth + 1);
	Known.One &= Known2.One;
	Known.Zero &= Known2.Zero;
	}
	break;
	}
	case ISD::CONCAT_VECTORS: {
	// Split DemandedElts and test each of the demanded subvectors.
	Known.Zero.setAllBits(); Known.One.setAllBits();
	EVT SubVectorVT = Op.getOperand(0).getValueType();
	unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
	unsigned NumSubVectors = Op.getNumOperands();
	for (unsigned i = 0; i != NumSubVectors; ++i) {
	APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
	DemandedSub = DemandedSub.trunc(NumSubVectorElts);
	if (!!DemandedSub) {
	SDValue Sub = Op.getOperand(i);
	computeKnownBits(Sub, Known2, DemandedSub, Depth + 1);
	Known.One &= Known2.One;
	Known.Zero &= Known2.Zero;
	}
	// If we don't know any bits, early out.
	if (!Known.One && !Known.Zero)
	break;
	}
	break;
	}
	case ISD::EXTRACT_SUBVECTOR: {
	// If we know the element index, just demand that subvector elements,
	// otherwise demand them all.
	SDValue Src = Op.getOperand(0);
	ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
	if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
	// Offset the demanded elts by the subvector index.
	uint64_t Idx = SubIdx->getZExtValue();
	APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
	computeKnownBits(Src, Known, DemandedSrc, Depth + 1);
	} else {
	computeKnownBits(Src, Known, Depth + 1);
	}
	break;
	}
	case ISD::BITCAST: {
	SDValue N0 = Op.getOperand(0);
	unsigned SubBitWidth = N0.getScalarValueSizeInBits();

	// Ignore bitcasts from floating point.
	if (!N0.getValueType().isInteger())
	break;

	// Fast handling of 'identity' bitcasts.
	if (BitWidth == SubBitWidth) {
	computeKnownBits(N0, Known, DemandedElts, Depth + 1);
	break;
	}

	// Support big-endian targets when it becomes useful.
	bool IsLE = getDataLayout().isLittleEndian();
	if (!IsLE)
	break;

	// Bitcast 'small element' vector to 'large element' scalar/vector.
	if ((BitWidth % SubBitWidth) == 0) {
	assert(N0.getValueType().isVector() && "Expected bitcast from vector");

	// Collect known bits for the (larger) output by collecting the known
	// bits from each set of sub elements and shift these into place.
	// We need to separately call computeKnownBits for each set of
	// sub elements as the knownbits for each is likely to be different.
	unsigned SubScale = BitWidth / SubBitWidth;
	APInt SubDemandedElts(NumElts * SubScale, 0);
	for (unsigned i = 0; i != NumElts; ++i)
	if (DemandedElts[i])
	SubDemandedElts.setBit(i * SubScale);

	for (unsigned i = 0; i != SubScale; ++i) {
	computeKnownBits(N0, Known2, SubDemandedElts.shl(i),
	Depth + 1);
	Known.One \|= Known2.One.zext(BitWidth).shl(SubBitWidth * i);
	Known.Zero \|= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i);
	}
	}

	// Bitcast 'large element' scalar/vector to 'small element' vector.
	if ((SubBitWidth % BitWidth) == 0) {
	assert(Op.getValueType().isVector() && "Expected bitcast to vector");

	// Collect known bits for the (smaller) output by collecting the known
	// bits from the overlapping larger input elements and extracting the
	// sub sections we actually care about.
	unsigned SubScale = SubBitWidth / BitWidth;
	APInt SubDemandedElts(NumElts / SubScale, 0);
	for (unsigned i = 0; i != NumElts; ++i)
	if (DemandedElts[i])
	SubDemandedElts.setBit(i / SubScale);

	computeKnownBits(N0, Known2, SubDemandedElts, Depth + 1);

	Known.Zero.setAllBits(); Known.One.setAllBits();
	for (unsigned i = 0; i != NumElts; ++i)
	if (DemandedElts[i]) {
	unsigned Offset = (i % SubScale) * BitWidth;
	Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
	Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
	// If we don't know any bits, early out.
	if (!Known.One && !Known.Zero)
	break;
	}
	}
	break;
	}
	case ISD::AND:
	// If either the LHS or the RHS are Zero, the result is zero.
	computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);

	// Output known-1 bits are only known if set in both the LHS & RHS.
	Known.One &= Known2.One;
	// Output known-0 are known to be clear if zero in either the LHS \| RHS.
	Known.Zero \|= Known2.Zero;
	break;
	case ISD::OR:
	computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);

	// Output known-0 bits are only known if clear in both the LHS & RHS.
	Known.Zero &= Known2.Zero;
	// Output known-1 are known to be set if set in either the LHS \| RHS.
	Known.One \|= Known2.One;
	break;
	case ISD::XOR: {
	computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);

	// Output known-0 bits are known if clear or set in both the LHS & RHS.
	APInt KnownZeroOut = (Known.Zero & Known2.Zero) \| (Known.One & Known2.One);
	// Output known-1 are known to be set if set in only one of the LHS, RHS.
	Known.One = (Known.Zero & Known2.One) \| (Known.One & Known2.Zero);
	Known.Zero = KnownZeroOut;
	break;
	}
	case ISD::MUL: {
	computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);

	// If low bits are zero in either operand, output low known-0 bits.
	// Also compute a conservative estimate for high known-0 bits.
	// More trickiness is possible, but this is sufficient for the
	// interesting case of alignment computation.
	unsigned TrailZ = Known.countMinTrailingZeros() +
	Known2.countMinTrailingZeros();
	unsigned LeadZ = std::max(Known.countMinLeadingZeros() +
	Known2.countMinLeadingZeros(),
	BitWidth) - BitWidth;

	Known.resetAll();
	Known.Zero.setLowBits(std::min(TrailZ, BitWidth));
	Known.Zero.setHighBits(std::min(LeadZ, BitWidth));
	break;
	}
	case ISD::UDIV: {
	// For the purposes of computing leading zeros we can conservatively
	// treat a udiv as a logical right shift by the power of 2 known to
	// be less than the denominator.
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
	unsigned LeadZ = Known2.countMinLeadingZeros();

	computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
	unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
	if (RHSMaxLeadingZeros != BitWidth)
	LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);

	Known.Zero.setHighBits(LeadZ);
	break;
	}
	case ISD::SELECT:
	computeKnownBits(Op.getOperand(2), Known, Depth+1);
	// If we don't know any bits, early out.
	if (!Known.One && !Known.Zero)
	break;
	computeKnownBits(Op.getOperand(1), Known2, Depth+1);

	// Only known if known in both the LHS and RHS.
	Known.One &= Known2.One;
	Known.Zero &= Known2.Zero;
	break;
	case ISD::SELECT_CC:
	computeKnownBits(Op.getOperand(3), Known, Depth+1);
	// If we don't know any bits, early out.
	if (!Known.One && !Known.Zero)
	break;
	computeKnownBits(Op.getOperand(2), Known2, Depth+1);

	// Only known if known in both the LHS and RHS.
	Known.One &= Known2.One;
	Known.Zero &= Known2.Zero;
	break;
	case ISD::SMULO:
	case ISD::UMULO:
	if (Op.getResNo() != 1)
	break;
	// The boolean result conforms to getBooleanContents.
	// If we know the result of a setcc has the top bits zero, use this info.
	// We know that we have an integer-based boolean since these operations
	// are only available for integer.
	if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
	TargetLowering::ZeroOrOneBooleanContent &&
	BitWidth > 1)
	Known.Zero.setBitsFrom(1);
	break;
	case ISD::SETCC:
	// If we know the result of a setcc has the top bits zero, use this info.
	if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
	TargetLowering::ZeroOrOneBooleanContent &&
	BitWidth > 1)
	Known.Zero.setBitsFrom(1);
	break;
	case ISD::SHL:
	if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
	computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
	Known.Zero <<= *ShAmt;
	Known.One <<= *ShAmt;
	// Low bits are known zero.
	Known.Zero.setLowBits(ShAmt->getZExtValue());
	}
	break;
	case ISD::SRL:
	if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
	computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
	Known.Zero.lshrInPlace(*ShAmt);
	Known.One.lshrInPlace(*ShAmt);
	// High bits are known zero.
	Known.Zero.setHighBits(ShAmt->getZExtValue());
	}
	break;
	case ISD::SRA:
	if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
	computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
	Known.Zero.lshrInPlace(*ShAmt);
	Known.One.lshrInPlace(*ShAmt);
	// If we know the value of the sign bit, then we know it is copied across
	// the high bits by the shift amount.
	APInt SignMask = APInt::getSignMask(BitWidth);
	SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask.
	if (Known.Zero.intersects(SignMask)) {
	Known.Zero.setHighBits(ShAmt->getZExtValue());// New bits are known zero.
	} else if (Known.One.intersects(SignMask)) {
	Known.One.setHighBits(ShAmt->getZExtValue()); // New bits are known one.
	}
	}
	break;
	case ISD::SIGN_EXTEND_INREG: {
	EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
	unsigned EBits = EVT.getScalarSizeInBits();

	// Sign extension. Compute the demanded bits in the result that are not
	// present in the input.
	APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);

	APInt InSignMask = APInt::getSignMask(EBits);
	APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);

	// If the sign extended bits are demanded, we know that the sign
	// bit is demanded.
	InSignMask = InSignMask.zext(BitWidth);
	if (NewBits.getBoolValue())
	InputDemandedBits \|= InSignMask;

	computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
	Known.One &= InputDemandedBits;
	Known.Zero &= InputDemandedBits;

	// If the sign bit of the input is known set or clear, then we know the
	// top bits of the result.
	if (Known.Zero.intersects(InSignMask)) { // Input sign bit known clear
	Known.Zero \|= NewBits;
	Known.One &= ~NewBits;
	} else if (Known.One.intersects(InSignMask)) { // Input sign bit known set
	Known.One \|= NewBits;
	Known.Zero &= ~NewBits;
	} else { // Input sign bit unknown
	Known.Zero &= ~NewBits;
	Known.One &= ~NewBits;
	}
	break;
	}
	case ISD::CTTZ:
	case ISD::CTTZ_ZERO_UNDEF: {
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
	// If we have a known 1, its position is our upper bound.
	unsigned PossibleTZ = Known2.countMaxTrailingZeros();
	unsigned LowBits = Log2_32(PossibleTZ) + 1;
	Known.Zero.setBitsFrom(LowBits);
	break;
	}
	case ISD::CTLZ:
	case ISD::CTLZ_ZERO_UNDEF: {
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
	// If we have a known 1, its position is our upper bound.
	unsigned PossibleLZ = Known2.countMaxLeadingZeros();
	unsigned LowBits = Log2_32(PossibleLZ) + 1;
	Known.Zero.setBitsFrom(LowBits);
	break;
	}
	case ISD::CTPOP: {
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
	// If we know some of the bits are zero, they can't be one.
	unsigned PossibleOnes = Known2.countMaxPopulation();
	Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
	break;
	}
	case ISD::LOAD: {
	LoadSDNode *LD = cast<LoadSDNode>(Op);
	// If this is a ZEXTLoad and we are looking at the loaded value.
	if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
	EVT VT = LD->getMemoryVT();
	unsigned MemBits = VT.getScalarSizeInBits();
	Known.Zero.setBitsFrom(MemBits);
	} else if (const MDNode *Ranges = LD->getRanges()) {
	if (LD->getExtensionType() == ISD::NON_EXTLOAD)
	computeKnownBitsFromRangeMetadata(*Ranges, Known);
	}
	break;
	}
	case ISD::ZERO_EXTEND_VECTOR_INREG: {
	EVT InVT = Op.getOperand(0).getValueType();
	unsigned InBits = InVT.getScalarSizeInBits();
	Known = Known.trunc(InBits);
	computeKnownBits(Op.getOperand(0), Known,
	DemandedElts.zext(InVT.getVectorNumElements()),
	Depth + 1);
	Known = Known.zext(BitWidth);
	Known.Zero.setBitsFrom(InBits);
	break;
	}
	case ISD::ZERO_EXTEND: {
	EVT InVT = Op.getOperand(0).getValueType();
	unsigned InBits = InVT.getScalarSizeInBits();
	Known = Known.trunc(InBits);
	computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
	Known = Known.zext(BitWidth);
	Known.Zero.setBitsFrom(InBits);
	break;
	}
	// TODO ISD::SIGN_EXTEND_VECTOR_INREG
	case ISD::SIGN_EXTEND: {
	EVT InVT = Op.getOperand(0).getValueType();
	unsigned InBits = InVT.getScalarSizeInBits();

	Known = Known.trunc(InBits);
	computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);

	// If the sign bit is known to be zero or one, then sext will extend
	// it to the top bits, else it will just zext.
	Known = Known.sext(BitWidth);
	break;
	}
	case ISD::ANY_EXTEND: {
	EVT InVT = Op.getOperand(0).getValueType();
	unsigned InBits = InVT.getScalarSizeInBits();
	Known = Known.trunc(InBits);
	computeKnownBits(Op.getOperand(0), Known, Depth+1);
	Known = Known.zext(BitWidth);
	break;
	}
	case ISD::TRUNCATE: {
	EVT InVT = Op.getOperand(0).getValueType();
	unsigned InBits = InVT.getScalarSizeInBits();
	Known = Known.zext(InBits);
	computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
	Known = Known.trunc(BitWidth);
	break;
	}
	case ISD::AssertZext: {
	EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
	APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
	computeKnownBits(Op.getOperand(0), Known, Depth+1);
	Known.Zero \|= (~InMask);
	Known.One &= (~Known.Zero);
	break;
	}
	case ISD::FGETSIGN:
	// All bits are zero except the low bit.
	Known.Zero.setBitsFrom(1);
	break;
	case ISD::USUBO:
	case ISD::SSUBO:
	if (Op.getResNo() == 1) {
	// If we know the result of a setcc has the top bits zero, use this info.
	if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
	TargetLowering::ZeroOrOneBooleanContent &&
	BitWidth > 1)
	Known.Zero.setBitsFrom(1);
	break;
	}
	LLVM_FALLTHROUGH;
	case ISD::SUB:
	case ISD::SUBC: {
	if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) {
	// We know that the top bits of C-X are clear if X contains less bits
	// than C (i.e. no wrap-around can happen). For example, 20-X is
	// positive if we can prove that X is >= 0 and < 16.
	if (CLHS->getAPIntValue().isNonNegative()) {
	unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
	// NLZ can't be BitWidth with no sign bit
	APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
	computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
	Depth + 1);

	// If all of the MaskV bits are known to be zero, then we know the
	// output top bits are zero, because we now know that the output is
	// from [0-C].
	if ((Known2.Zero & MaskV) == MaskV) {
	unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
	// Top bits known zero.
	Known.Zero.setHighBits(NLZ2);
	}
	}
	}

	// If low bits are know to be zero in both operands, then we know they are
	// going to be 0 in the result. Both addition and complement operations
	// preserve the low zero bits.
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
	unsigned KnownZeroLow = Known2.countMinTrailingZeros();
	if (KnownZeroLow == 0)
	break;

	computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
	KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
	Known.Zero.setLowBits(KnownZeroLow);
	break;
	}
	case ISD::UADDO:
	case ISD::SADDO:
	case ISD::ADDCARRY:
	if (Op.getResNo() == 1) {
	// If we know the result of a setcc has the top bits zero, use this info.
	if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
	TargetLowering::ZeroOrOneBooleanContent &&
	BitWidth > 1)
	Known.Zero.setBitsFrom(1);
	break;
	}
	LLVM_FALLTHROUGH;
	case ISD::ADD:
	case ISD::ADDC:
	case ISD::ADDE: {
	// Output known-0 bits are known if clear or set in both the low clear bits
	// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
	// low 3 bits clear.
	// Output known-0 bits are also known if the top bits of each input are
	// known to be clear. For example, if one input has the top 10 bits clear
	// and the other has the top 8 bits clear, we know the top 7 bits of the
	// output must be clear.
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
	unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
	unsigned KnownZeroLow = Known2.countMinTrailingZeros();

	computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
	Depth + 1);
	KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
	KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());

	if (Opcode == ISD::ADDE \|\| Opcode == ISD::ADDCARRY) {
	// With ADDE and ADDCARRY, a carry bit may be added in, so we can only
	// use this information if we know (at least) that the low two bits are
	// clear. We then return to the caller that the low bit is unknown but
	// that other bits are known zero.
	if (KnownZeroLow >= 2)
	Known.Zero.setBits(1, KnownZeroLow);
	break;
	}

	Known.Zero.setLowBits(KnownZeroLow);
	if (KnownZeroHigh > 1)
	Known.Zero.setHighBits(KnownZeroHigh - 1);
	break;
	}
	case ISD::SREM:
	if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
	const APInt &RA = Rem->getAPIntValue().abs();
	if (RA.isPowerOf2()) {
	APInt LowBits = RA - 1;
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);

	// The low bits of the first operand are unchanged by the srem.
	Known.Zero = Known2.Zero & LowBits;
	Known.One = Known2.One & LowBits;

	// If the first operand is non-negative or has all low bits zero, then
	// the upper bits are all zero.
	if (Known2.Zero[BitWidth-1] \|\| ((Known2.Zero & LowBits) == LowBits))
	Known.Zero \|= ~LowBits;

	// If the first operand is negative and not all low bits are zero, then
	// the upper bits are all one.
	if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0))
	Known.One \|= ~LowBits;
	assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?");
	}
	}
	break;
	case ISD::UREM: {
	if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
	const APInt &RA = Rem->getAPIntValue();
	if (RA.isPowerOf2()) {
	APInt LowBits = (RA - 1);
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);

	// The upper bits are all zero, the lower ones are unchanged.
	Known.Zero = Known2.Zero \| ~LowBits;
	Known.One = Known2.One & LowBits;
	break;
	}
	}

	// Since the result is less than or equal to either operand, any leading
	// zero bits in either operand must also exist in the result.
	computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
	computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);

	uint32_t Leaders =
	std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
	Known.resetAll();
	Known.Zero.setHighBits(Leaders);
	break;
	}
	case ISD::EXTRACT_ELEMENT: {
	computeKnownBits(Op.getOperand(0), Known, Depth+1);
	const unsigned Index = Op.getConstantOperandVal(1);
	const unsigned BitWidth = Op.getValueSizeInBits();

	// Remove low part of known bits mask
	Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth);
	Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth);

	// Remove high part of known bit mask
	Known = Known.trunc(BitWidth);
	break;
	}
	case ISD::EXTRACT_VECTOR_ELT: {
	SDValue InVec = Op.getOperand(0);
	SDValue EltNo = Op.getOperand(1);
	EVT VecVT = InVec.getValueType();
	const unsigned BitWidth = Op.getValueSizeInBits();
	const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
	const unsigned NumSrcElts = VecVT.getVectorNumElements();
	// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
	// anything about the extended bits.
	if (BitWidth > EltBitWidth)
	Known = Known.trunc(EltBitWidth);
	ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
	if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) {
	// If we know the element index, just demand that vector element.
	unsigned Idx = ConstEltNo->getZExtValue();
	APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
	computeKnownBits(InVec, Known, DemandedElt, Depth + 1);
	} else {
	// Unknown element index, so ignore DemandedElts and demand them all.
	computeKnownBits(InVec, Known, Depth + 1);
	}
	if (BitWidth > EltBitWidth)
	Known = Known.zext(BitWidth);
	break;
	}
	case ISD::INSERT_VECTOR_ELT: {
	SDValue InVec = Op.getOperand(0);
	SDValue InVal = Op.getOperand(1);
	SDValue EltNo = Op.getOperand(2);

	ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
	if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
	// If we know the element index, split the demand between the
	// source vector and the inserted element.
	Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth);
	unsigned EltIdx = CEltNo->getZExtValue();

	// If we demand the inserted element then add its common known bits.
	if (DemandedElts[EltIdx]) {
	computeKnownBits(InVal, Known2, Depth + 1);
	Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
	Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
	}

	// If we demand the source vector then add its common known bits, ensuring
	// that we don't demand the inserted element.
	APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
	if (!!VectorElts) {
	computeKnownBits(InVec, Known2, VectorElts, Depth + 1);
	Known.One &= Known2.One;
	Known.Zero &= Known2.Zero;
	}
	} else {
	// Unknown element index, so ignore DemandedElts and demand them all.
	computeKnownBits(InVec, Known, Depth + 1);
	computeKnownBits(InVal, Known2, Depth + 1);
	Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
	Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
	}
	break;
	}
	case ISD::BITREVERSE: {
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
	Known.Zero = Known2.Zero.reverseBits();
	Known.One = Known2.One.reverseBits();
	break;
	}
	case ISD::BSWAP: {
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
	Known.Zero = Known2.Zero.byteSwap();
	Known.One = Known2.One.byteSwap();
	break;
	}
	case ISD::ABS: {
	computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);

	// If the source's MSB is zero then we know the rest of the bits already.
	if (Known2.isNonNegative()) {
	Known.Zero = Known2.Zero;
	Known.One = Known2.One;
	break;
	}

	// We only know that the absolute values's MSB will be zero iff there is
	// a set bit that isn't the sign bit (otherwise it could be INT_MIN).
	Known2.One.clearSignBit();
	if (Known2.One.getBoolValue()) {
	Known.Zero = APInt::getSignMask(BitWidth);
	break;
	}
	break;
	}
	case ISD::UMIN: {
	computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
	computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);

	// UMIN - we know that the result will have the maximum of the
	// known zero leading bits of the inputs.
	unsigned LeadZero = Known.countMinLeadingZeros();
	LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros());

	Known.Zero &= Known2.Zero;
	Known.One &= Known2.One;
	Known.Zero.setHighBits(LeadZero);
	break;
	}
	case ISD::UMAX: {
	computeKnownBits(Op.getOperand(0), Known, DemandedElts,
	Depth + 1);
	computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);

	// UMAX - we know that the result will have the maximum of the
	// known one leading bits of the inputs.
	unsigned LeadOne = Known.countMinLeadingOnes();
	LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes());

	Known.Zero &= Known2.Zero;
	Known.One &= Known2.One;
	Known.One.setHighBits(LeadOne);
	break;
	}
	case ISD::SMIN:
	case ISD::SMAX: {
	computeKnownBits(Op.getOperand(0), Known, DemandedElts,
	Depth + 1);
	// If we don't know any bits, early out.
	if (!Known.One && !Known.Zero)
	break;
	computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
	Known.Zero &= Known2.Zero;
	Known.One &= Known2.One;
	break;
	}
	case ISD::FrameIndex:
	case ISD::TargetFrameIndex:
	if (unsigned Align = InferPtrAlignment(Op)) {
	// The low bits are known zero if the pointer is aligned.
	Known.Zero.setLowBits(Log2_32(Align));
	break;
	}
	break;

	default:
	if (Opcode < ISD::BUILTIN_OP_END)
	break;
	LLVM_FALLTHROUGH;
	case ISD::INTRINSIC_WO_CHAIN:
	case ISD::INTRINSIC_W_CHAIN:
	case ISD::INTRINSIC_VOID:
	// Allow the target to implement this method for its nodes.
	TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth);
	break;
	}

	assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
	}

	SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
	SDValue N1) const {
	// X + 0 never overflow
	if (isNullConstant(N1))
	return OFK_Never;

	KnownBits N1Known;
	computeKnownBits(N1, N1Known);
	if (N1Known.Zero.getBoolValue()) {
	KnownBits N0Known;
	computeKnownBits(N0, N0Known);

	bool overflow;
	(void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow);
	if (!overflow)
	return OFK_Never;
	}

	// mulhi + 1 never overflow
	if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
	(~N1Known.Zero & 0x01) == ~N1Known.Zero)
	return OFK_Never;

	if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
	KnownBits N0Known;
	computeKnownBits(N0, N0Known);

	if ((~N0Known.Zero & 0x01) == ~N0Known.Zero)
	return OFK_Never;
	}

	return OFK_Sometime;
	}

	bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
	EVT OpVT = Val.getValueType();
	unsigned BitWidth = OpVT.getScalarSizeInBits();

	// Is the constant a known power of 2?
	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val))
	return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();

	// A left-shift of a constant one will have exactly one bit set because
	// shifting the bit off the end is undefined.
	if (Val.getOpcode() == ISD::SHL) {
	auto *C = isConstOrConstSplat(Val.getOperand(0));
	if (C && C->getAPIntValue() == 1)
	return true;
	}

	// Similarly, a logical right-shift of a constant sign-bit will have exactly
	// one bit set.
	if (Val.getOpcode() == ISD::SRL) {
	auto *C = isConstOrConstSplat(Val.getOperand(0));
	if (C && C->getAPIntValue().isSignMask())
	return true;
	}

	// Are all operands of a build vector constant powers of two?
	if (Val.getOpcode() == ISD::BUILD_VECTOR)
	if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) {
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E))
	return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
	return false;
	}))
	return true;

	// More could be done here, though the above checks are enough
	// to handle some common cases.

	// Fall back to computeKnownBits to catch other known cases.
	KnownBits Known;
	computeKnownBits(Val, Known);
	return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
	}

	unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
	EVT VT = Op.getValueType();
	APInt DemandedElts = VT.isVector()
	? APInt::getAllOnesValue(VT.getVectorNumElements())
	: APInt(1, 1);
	return ComputeNumSignBits(Op, DemandedElts, Depth);
	}

	unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
	unsigned Depth) const {
	EVT VT = Op.getValueType();
	assert(VT.isInteger() && "Invalid VT!");
	unsigned VTBits = VT.getScalarSizeInBits();
	unsigned NumElts = DemandedElts.getBitWidth();
	unsigned Tmp, Tmp2;
	unsigned FirstAnswer = 1;

	if (Depth == 6)
	return 1; // Limit search depth.

	if (!DemandedElts)
	return 1; // No demanded elts, better to assume we don't know anything.

	switch (Op.getOpcode()) {
	default: break;
	case ISD::AssertSext:
	Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
	return VTBits-Tmp+1;
	case ISD::AssertZext:
	Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
	return VTBits-Tmp;

	case ISD::Constant: {
	const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
	return Val.getNumSignBits();
	}

	case ISD::BUILD_VECTOR:
	Tmp = VTBits;
	for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
	if (!DemandedElts[i])
	continue;

	SDValue SrcOp = Op.getOperand(i);
	Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1);

	// BUILD_VECTOR can implicitly truncate sources, we must handle this.
	if (SrcOp.getValueSizeInBits() != VTBits) {
	assert(SrcOp.getValueSizeInBits() > VTBits &&
	"Expected BUILD_VECTOR implicit truncation");
	unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits;
	Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1);
	}
	Tmp = std::min(Tmp, Tmp2);
	}
	return Tmp;

	case ISD::VECTOR_SHUFFLE: {
	// Collect the minimum number of sign bits that are shared by every vector
	// element referenced by the shuffle.
	APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
	const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
	assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
	for (unsigned i = 0; i != NumElts; ++i) {
	int M = SVN->getMaskElt(i);
	if (!DemandedElts[i])
	continue;
	// For UNDEF elements, we don't know anything about the common state of
	// the shuffle result.
	if (M < 0)
	return 1;
	if ((unsigned)M < NumElts)
	DemandedLHS.setBit((unsigned)M % NumElts);
	else
	DemandedRHS.setBit((unsigned)M % NumElts);
	}
	Tmp = std::numeric_limits<unsigned>::max();
	if (!!DemandedLHS)
	Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
	if (!!DemandedRHS) {
	Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1);
	Tmp = std::min(Tmp, Tmp2);
	}
	// If we don't know anything, early out and try computeKnownBits fall-back.
	if (Tmp == 1)
	break;
	assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
	return Tmp;
	}

	case ISD::SIGN_EXTEND:
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
	return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;

	case ISD::SIGN_EXTEND_INREG:
	// Max of the input and what this extends.
	Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
	Tmp = VTBits-Tmp+1;

	Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
	return std::max(Tmp, Tmp2);

	case ISD::SRA:
	Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
	// SRA X, C -> adds C sign bits.
	if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
	APInt ShiftVal = C->getAPIntValue();
	ShiftVal += Tmp;
	Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
	}
	return Tmp;
	case ISD::SHL:
	if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
	// shl destroys sign bits.
	Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
	if (C->getAPIntValue().uge(VTBits) \|\| // Bad shift.
	C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out.
	return Tmp - C->getZExtValue();
	}
	break;
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR: // NOT is handled here.
	// Logical binary ops preserve the number of sign bits at the worst.
	Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
	if (Tmp != 1) {
	Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
	FirstAnswer = std::min(Tmp, Tmp2);
	// We computed what we know about the sign bits as our first
	// answer. Now proceed to the generic code that uses
	// computeKnownBits, and pick whichever answer is better.
	}
	break;

	case ISD::SELECT:
	Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
	if (Tmp == 1) return 1; // Early out.
	Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
	return std::min(Tmp, Tmp2);
	case ISD::SELECT_CC:
	Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1);
	if (Tmp == 1) return 1; // Early out.
	Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1);
	return std::min(Tmp, Tmp2);
	case ISD::SMIN:
	case ISD::SMAX:
	case ISD::UMIN:
	case ISD::UMAX:
	Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
	if (Tmp == 1)
	return 1; // Early out.
	Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
	return std::min(Tmp, Tmp2);
	case ISD::SADDO:
	case ISD::UADDO:
	case ISD::SSUBO:
	case ISD::USUBO:
	case ISD::SMULO:
	case ISD::UMULO:
	if (Op.getResNo() != 1)
	break;
	// The boolean result conforms to getBooleanContents. Fall through.
	// If setcc returns 0/-1, all bits are sign bits.
	// We know that we have an integer-based boolean since these operations
	// are only available for integer.
	if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
	TargetLowering::ZeroOrNegativeOneBooleanContent)
	return VTBits;
	break;
	case ISD::SETCC:
	// If setcc returns 0/-1, all bits are sign bits.
	if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
	TargetLowering::ZeroOrNegativeOneBooleanContent)
	return VTBits;
	break;
	case ISD::ROTL:
	case ISD::ROTR:
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
	unsigned RotAmt = C->getZExtValue() & (VTBits-1);

	// Handle rotate right by N like a rotate left by 32-N.
	if (Op.getOpcode() == ISD::ROTR)
	RotAmt = (VTBits-RotAmt) & (VTBits-1);

	// If we aren't rotating out all of the known-in sign bits, return the
	// number that are left. This handles rotl(sext(x), 1) for example.
	Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
	if (Tmp > RotAmt+1) return Tmp-RotAmt;
	}
	break;
	case ISD::ADD:
	case ISD::ADDC:
	// Add can have at most one carry bit. Thus we know that the output
	// is, at worst, one more bit than the inputs.
	Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
	if (Tmp == 1) return 1; // Early out.

	// Special case decrementing a value (ADD X, -1):
	if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
	if (CRHS->isAllOnesValue()) {
	KnownBits Known;
	computeKnownBits(Op.getOperand(0), Known, Depth+1);

	// If the input is known to be 0 or 1, the output is 0/-1, which is all
	// sign bits set.
	if ((Known.Zero \| 1).isAllOnesValue())
	return VTBits;

	// If we are subtracting one from a positive number, there is no carry
	// out of the result.
	if (Known.isNonNegative())
	return Tmp;
	}

	Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
	if (Tmp2 == 1) return 1;
	return std::min(Tmp, Tmp2)-1;

	case ISD::SUB:
	Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
	if (Tmp2 == 1) return 1;

	// Handle NEG.
	if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
	if (CLHS->isNullValue()) {
	KnownBits Known;
	computeKnownBits(Op.getOperand(1), Known, Depth+1);
	// If the input is known to be 0 or 1, the output is 0/-1, which is all
	// sign bits set.
	if ((Known.Zero \| 1).isAllOnesValue())
	return VTBits;

	// If the input is known to be positive (the sign bit is known clear),
	// the output of the NEG has the same number of sign bits as the input.
	if (Known.isNonNegative())
	return Tmp2;

	// Otherwise, we treat this like a SUB.
	}

	// Sub can have at most one carry bit. Thus we know that the output
	// is, at worst, one more bit than the inputs.
	Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
	if (Tmp == 1) return 1; // Early out.
	return std::min(Tmp, Tmp2)-1;
	case ISD::TRUNCATE: {
	// Check if the sign bits of source go down as far as the truncated value.
	unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits();
	unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
	if (NumSrcSignBits > (NumSrcBits - VTBits))
	return NumSrcSignBits - (NumSrcBits - VTBits);
	break;
	}
	case ISD::EXTRACT_ELEMENT: {
	const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
	const int BitWidth = Op.getValueSizeInBits();
	const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth;

	// Get reverse index (starting from 1), Op1 value indexes elements from
	// little end. Sign starts at big end.
	const int rIndex = Items - 1 - Op.getConstantOperandVal(1);

	// If the sign portion ends in our element the subtraction gives correct
	// result. Otherwise it gives either negative or > bitwidth result
	return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
	}
	case ISD::INSERT_VECTOR_ELT: {
	SDValue InVec = Op.getOperand(0);
	SDValue InVal = Op.getOperand(1);
	SDValue EltNo = Op.getOperand(2);
	unsigned NumElts = InVec.getValueType().getVectorNumElements();

	ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
	if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
	// If we know the element index, split the demand between the
	// source vector and the inserted element.
	unsigned EltIdx = CEltNo->getZExtValue();

	// If we demand the inserted element then get its sign bits.
	Tmp = std::numeric_limits<unsigned>::max();
	if (DemandedElts[EltIdx]) {
	// TODO - handle implicit truncation of inserted elements.
	if (InVal.getScalarValueSizeInBits() != VTBits)
	break;
	Tmp = ComputeNumSignBits(InVal, Depth + 1);
	}

	// If we demand the source vector then get its sign bits, and determine
	// the minimum.
	APInt VectorElts = DemandedElts;
	VectorElts.clearBit(EltIdx);
	if (!!VectorElts) {
	Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1);
	Tmp = std::min(Tmp, Tmp2);
	}
	} else {
	// Unknown element index, so ignore DemandedElts and demand them all.
	Tmp = ComputeNumSignBits(InVec, Depth + 1);
	Tmp2 = ComputeNumSignBits(InVal, Depth + 1);
	Tmp = std::min(Tmp, Tmp2);
	}
	assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
	return Tmp;
	}
	case ISD::EXTRACT_VECTOR_ELT: {
	SDValue InVec = Op.getOperand(0);
	SDValue EltNo = Op.getOperand(1);
	EVT VecVT = InVec.getValueType();
	const unsigned BitWidth = Op.getValueSizeInBits();
	const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
	const unsigned NumSrcElts = VecVT.getVectorNumElements();

	// If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
	// anything about sign bits. But if the sizes match we can derive knowledge
	// about sign bits from the vector operand.
	if (BitWidth != EltBitWidth)
	break;

	// If we know the element index, just demand that vector element, else for
	// an unknown element index, ignore DemandedElts and demand them all.
	APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
	ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
	if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
	DemandedSrcElts =
	APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());

	return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
	}
	case ISD::EXTRACT_SUBVECTOR: {
	// If we know the element index, just demand that subvector elements,
	// otherwise demand them all.
	SDValue Src = Op.getOperand(0);
	ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
	if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
	// Offset the demanded elts by the subvector index.
	uint64_t Idx = SubIdx->getZExtValue();
	APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
	return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
	}
	return ComputeNumSignBits(Src, Depth + 1);
	}
	case ISD::CONCAT_VECTORS:
	// Determine the minimum number of sign bits across all demanded
	// elts of the input vectors. Early out if the result is already 1.
	Tmp = std::numeric_limits<unsigned>::max();
	EVT SubVectorVT = Op.getOperand(0).getValueType();
	unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
	unsigned NumSubVectors = Op.getNumOperands();
	for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) {
	APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
	DemandedSub = DemandedSub.trunc(NumSubVectorElts);
	if (!DemandedSub)
	continue;
	Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1);
	Tmp = std::min(Tmp, Tmp2);
	}
	assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
	return Tmp;
	}

	// If we are looking at the loaded value of the SDNode.
	if (Op.getResNo() == 0) {
	// Handle LOADX separately here. EXTLOAD case will fallthrough.
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
	unsigned ExtType = LD->getExtensionType();
	switch (ExtType) {
	default: break;
	case ISD::SEXTLOAD: // '17' bits known
	Tmp = LD->getMemoryVT().getScalarSizeInBits();
	return VTBits-Tmp+1;
	case ISD::ZEXTLOAD: // '16' bits known
	Tmp = LD->getMemoryVT().getScalarSizeInBits();
	return VTBits-Tmp;
	}
	}
	}

	// Allow the target to implement this method for its nodes.
	if (Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_VOID) {
	unsigned NumBits =
	TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
	if (NumBits > 1)
	FirstAnswer = std::max(FirstAnswer, NumBits);
	}

	// Finally, if we can prove that the top bits of the result are 0's or 1's,
	// use this information.
	KnownBits Known;
	computeKnownBits(Op, Known, DemandedElts, Depth);

	APInt Mask;
	if (Known.isNonNegative()) { // sign bit is 0
	Mask = Known.Zero;
	} else if (Known.isNegative()) { // sign bit is 1;
	Mask = Known.One;
	} else {
	// Nothing known.
	return FirstAnswer;
	}

	// Okay, we know that the sign bit in Mask is set. Use CLZ to determine
	// the number of identical bits in the top of the input value.
	Mask = ~Mask;
	Mask <<= Mask.getBitWidth()-VTBits;
	// Return # leading zeros. We use 'min' here in case Val was zero before
	// shifting. We don't want to return '64' as for an i32 "0".
	return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
	}

	bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
	if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) \|\|
	!isa<ConstantSDNode>(Op.getOperand(1)))
	return false;

	if (Op.getOpcode() == ISD::OR &&
	!MaskedValueIsZero(Op.getOperand(0),
	cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
	return false;

	return true;
	}

	bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
	// If we're told that NaNs won't happen, assume they won't.
	if (getTarget().Options.NoNaNsFPMath)
	return true;

	if (Op->getFlags().hasNoNaNs())
	return true;

	// If the value is a constant, we can obviously see if it is a NaN or not.
	if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
	return !C->getValueAPF().isNaN();

	// TODO: Recognize more cases here.

	return false;
	}

	bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
	// If the value is a constant, we can obviously see if it is a zero or not.
	if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
	return !C->isZero();

	// TODO: Recognize more cases here.
	switch (Op.getOpcode()) {
	default: break;
	case ISD::OR:
	if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
	return !C->isNullValue();
	break;
	}

	return false;
	}

	bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
	// Check the obvious case.
	if (A == B) return true;

	// For for negative and positive zero.
	if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
	if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
	if (CA->isZero() && CB->isZero()) return true;

	// Otherwise they may not be equal.
	return false;
	}

	bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
	assert(A.getValueType() == B.getValueType() &&
	"Values must have the same type");
	KnownBits AKnown, BKnown;
	computeKnownBits(A, AKnown);
	computeKnownBits(B, BKnown);
	return (AKnown.Zero \| BKnown.Zero).isAllOnesValue();
	}

	static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
	ArrayRef<SDValue> Ops,
	SelectionDAG &DAG) {
	assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
	assert(llvm::all_of(Ops,
	[Ops](SDValue Op) {
	return Ops[0].getValueType() == Op.getValueType();
	}) &&
	"Concatenation of vectors with inconsistent value types!");
	assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) ==
	VT.getVectorNumElements() &&
	"Incorrect element count in vector concatenation!");

	if (Ops.size() == 1)
	return Ops[0];

	// Concat of UNDEFs is UNDEF.
	if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
	return DAG.getUNDEF(VT);

	// A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
	// simplified to one big BUILD_VECTOR.
	// FIXME: Add support for SCALAR_TO_VECTOR as well.
	EVT SVT = VT.getScalarType();
	SmallVector<SDValue, 16> Elts;
	for (SDValue Op : Ops) {
	EVT OpVT = Op.getValueType();
	if (Op.isUndef())
	Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT));
	else if (Op.getOpcode() == ISD::BUILD_VECTOR)
	Elts.append(Op->op_begin(), Op->op_end());
	else
	return SDValue();
	}

	// BUILD_VECTOR requires all inputs to be of the same type, find the
	// maximum type and extend them all.
	for (SDValue Op : Elts)
	SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);

	if (SVT.bitsGT(VT.getScalarType()))
	for (SDValue &Op : Elts)
	Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
	? DAG.getZExtOrTrunc(Op, DL, SVT)
	: DAG.getSExtOrTrunc(Op, DL, SVT);

	return DAG.getBuildVector(VT, DL, Elts);
	}

	/// Gets or creates the specified node.
	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opcode, getVTList(VT), None);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(),
	getVTList(VT));
	CSEMap.InsertNode(N, IP);

	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
	SDValue Operand, const SDNodeFlags Flags) {
	// Constant fold unary operations with an integer constant operand. Even
	// opaque constant will be folded, because the folding of unary operations
	// doesn't create new constants with different values. Nevertheless, the
	// opaque flag is preserved during folding to prevent future folding with
	// other constants.
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand)) {
	const APInt &Val = C->getAPIntValue();
	switch (Opcode) {
	default: break;
	case ISD::SIGN_EXTEND:
	return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
	C->isTargetOpcode(), C->isOpaque());
	case ISD::ANY_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::TRUNCATE:
	return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
	C->isTargetOpcode(), C->isOpaque());
	case ISD::UINT_TO_FP:
	case ISD::SINT_TO_FP: {
	APFloat apf(EVTToAPFloatSemantics(VT),
	APInt::getNullValue(VT.getSizeInBits()));
	(void)apf.convertFromAPInt(Val,
	Opcode==ISD::SINT_TO_FP,
	APFloat::rmNearestTiesToEven);
	return getConstantFP(apf, DL, VT);
	}
	case ISD::BITCAST:
	if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
	return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
	if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
	return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
	if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
	return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
	if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
	return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
	break;
	case ISD::ABS:
	return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
	C->isOpaque());
	case ISD::BITREVERSE:
	return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
	C->isOpaque());
	case ISD::BSWAP:
	return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
	C->isOpaque());
	case ISD::CTPOP:
	return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(),
	C->isOpaque());
	case ISD::CTLZ:
	case ISD::CTLZ_ZERO_UNDEF:
	return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(),
	C->isOpaque());
	case ISD::CTTZ:
	case ISD::CTTZ_ZERO_UNDEF:
	return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
	C->isOpaque());
	case ISD::FP16_TO_FP: {
	bool Ignored;
	APFloat FPV(APFloat::IEEEhalf(),
	(Val.getBitWidth() == 16) ? Val : Val.trunc(16));

	// This can return overflow, underflow, or inexact; we don't care.
	// FIXME need to be more flexible about rounding mode.
	(void)FPV.convert(EVTToAPFloatSemantics(VT),
	APFloat::rmNearestTiesToEven, &Ignored);
	return getConstantFP(FPV, DL, VT);
	}
	}
	}

	// Constant fold unary operations with a floating point constant operand.
	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand)) {
	APFloat V = C->getValueAPF(); // make copy
	switch (Opcode) {
	case ISD::FNEG:
	V.changeSign();
	return getConstantFP(V, DL, VT);
	case ISD::FABS:
	V.clearSign();
	return getConstantFP(V, DL, VT);
	case ISD::FCEIL: {
	APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
	if (fs == APFloat::opOK \|\| fs == APFloat::opInexact)
	return getConstantFP(V, DL, VT);
	break;
	}
	case ISD::FTRUNC: {
	APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
	if (fs == APFloat::opOK \|\| fs == APFloat::opInexact)
	return getConstantFP(V, DL, VT);
	break;
	}
	case ISD::FFLOOR: {
	APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
	if (fs == APFloat::opOK \|\| fs == APFloat::opInexact)
	return getConstantFP(V, DL, VT);
	break;
	}
	case ISD::FP_EXTEND: {
	bool ignored;
	// This can return overflow, underflow, or inexact; we don't care.
	// FIXME need to be more flexible about rounding mode.
	(void)V.convert(EVTToAPFloatSemantics(VT),
	APFloat::rmNearestTiesToEven, &ignored);
	return getConstantFP(V, DL, VT);
	}
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT: {
	bool ignored;
	APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
	// FIXME need to be more flexible about rounding mode.
	APFloat::opStatus s =
	V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
	if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
	break;
	return getConstant(IntVal, DL, VT);
	}
	case ISD::BITCAST:
	if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
	return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
	else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
	return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
	else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
	return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
	break;
	case ISD::FP_TO_FP16: {
	bool Ignored;
	// This can return overflow, underflow, or inexact; we don't care.
	// FIXME need to be more flexible about rounding mode.
	(void)V.convert(APFloat::IEEEhalf(),
	APFloat::rmNearestTiesToEven, &Ignored);
	return getConstant(V.bitcastToAPInt(), DL, VT);
	}
	}
	}

	// Constant fold unary operations with a vector integer or float operand.
	if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) {
	if (BV->isConstant()) {
	switch (Opcode) {
	default:
	// FIXME: Entirely reasonable to perform folding of other unary
	// operations here as the need arises.
	break;
	case ISD::FNEG:
	case ISD::FABS:
	case ISD::FCEIL:
	case ISD::FTRUNC:
	case ISD::FFLOOR:
	case ISD::FP_EXTEND:
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	case ISD::TRUNCATE:
	case ISD::UINT_TO_FP:
	case ISD::SINT_TO_FP:
	case ISD::ABS:
	case ISD::BITREVERSE:
	case ISD::BSWAP:
	case ISD::CTLZ:
	case ISD::CTLZ_ZERO_UNDEF:
	case ISD::CTTZ:
	case ISD::CTTZ_ZERO_UNDEF:
	case ISD::CTPOP: {
	SDValue Ops = { Operand };
	if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
	return Fold;
	}
	}
	}
	}

	unsigned OpOpcode = Operand.getNode()->getOpcode();
	switch (Opcode) {
	case ISD::TokenFactor:
	case ISD::MERGE_VALUES:
	case ISD::CONCAT_VECTORS:
	return Operand; // Factor, merge or concat of one node? No need.
	case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
	case ISD::FP_EXTEND:
	assert(VT.isFloatingPoint() &&
	Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
	if (Operand.getValueType() == VT) return Operand; // noop conversion.
	assert((!VT.isVector() \|\|
	VT.getVectorNumElements() ==
	Operand.getValueType().getVectorNumElements()) &&
	"Vector element count mismatch!");
	assert(Operand.getValueType().bitsLT(VT) &&
	"Invalid fpext node, dst < src!");
	if (Operand.isUndef())
	return getUNDEF(VT);
	break;
	case ISD::SIGN_EXTEND:
	assert(VT.isInteger() && Operand.getValueType().isInteger() &&
	"Invalid SIGN_EXTEND!");
	if (Operand.getValueType() == VT) return Operand; // noop extension
	assert((!VT.isVector() \|\|
	VT.getVectorNumElements() ==
	Operand.getValueType().getVectorNumElements()) &&
	"Vector element count mismatch!");
	assert(Operand.getValueType().bitsLT(VT) &&
	"Invalid sext node, dst < src!");
	if (OpOpcode == ISD::SIGN_EXTEND \|\| OpOpcode == ISD::ZERO_EXTEND)
	return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
	else if (OpOpcode == ISD::UNDEF)
	// sext(undef) = 0, because the top bits will all be the same.
	return getConstant(0, DL, VT);
	break;
	case ISD::ZERO_EXTEND:
	assert(VT.isInteger() && Operand.getValueType().isInteger() &&
	"Invalid ZERO_EXTEND!");
	if (Operand.getValueType() == VT) return Operand; // noop extension
	assert((!VT.isVector() \|\|
	VT.getVectorNumElements() ==
	Operand.getValueType().getVectorNumElements()) &&
	"Vector element count mismatch!");
	assert(Operand.getValueType().bitsLT(VT) &&
	"Invalid zext node, dst < src!");
	if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
	return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0));
	else if (OpOpcode == ISD::UNDEF)
	// zext(undef) = 0, because the top bits will be zero.
	return getConstant(0, DL, VT);
	break;
	case ISD::ANY_EXTEND:
	assert(VT.isInteger() && Operand.getValueType().isInteger() &&
	"Invalid ANY_EXTEND!");
	if (Operand.getValueType() == VT) return Operand; // noop extension
	assert((!VT.isVector() \|\|
	VT.getVectorNumElements() ==
	Operand.getValueType().getVectorNumElements()) &&
	"Vector element count mismatch!");
	assert(Operand.getValueType().bitsLT(VT) &&
	"Invalid anyext node, dst < src!");

	if (OpOpcode == ISD::ZERO_EXTEND \|\| OpOpcode == ISD::SIGN_EXTEND \|\|
	OpOpcode == ISD::ANY_EXTEND)
	// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
	return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
	else if (OpOpcode == ISD::UNDEF)
	return getUNDEF(VT);

	// (ext (trunx x)) -> x
	if (OpOpcode == ISD::TRUNCATE) {
	SDValue OpOp = Operand.getOperand(0);
	if (OpOp.getValueType() == VT)
	return OpOp;
	}
	break;
	case ISD::TRUNCATE:
	assert(VT.isInteger() && Operand.getValueType().isInteger() &&
	"Invalid TRUNCATE!");
	if (Operand.getValueType() == VT) return Operand; // noop truncate
	assert((!VT.isVector() \|\|
	VT.getVectorNumElements() ==
	Operand.getValueType().getVectorNumElements()) &&
	"Vector element count mismatch!");
	assert(Operand.getValueType().bitsGT(VT) &&
	"Invalid truncate node, src < dst!");
	if (OpOpcode == ISD::TRUNCATE)
	return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
	if (OpOpcode == ISD::ZERO_EXTEND \|\| OpOpcode == ISD::SIGN_EXTEND \|\|
	OpOpcode == ISD::ANY_EXTEND) {
	// If the source is smaller than the dest, we still need an extend.
	if (Operand.getOperand(0).getValueType().getScalarType()
	.bitsLT(VT.getScalarType()))
	return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
	if (Operand.getOperand(0).getValueType().bitsGT(VT))
	return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
	return Operand.getOperand(0);
	}
	if (OpOpcode == ISD::UNDEF)
	return getUNDEF(VT);
	break;
	case ISD::ABS:
	assert(VT.isInteger() && VT == Operand.getValueType() &&
	"Invalid ABS!");
	if (OpOpcode == ISD::UNDEF)
	return getUNDEF(VT);
	break;
	case ISD::BSWAP:
	assert(VT.isInteger() && VT == Operand.getValueType() &&
	"Invalid BSWAP!");
	assert((VT.getScalarSizeInBits() % 16 == 0) &&
	"BSWAP types must be a multiple of 16 bits!");
	if (OpOpcode == ISD::UNDEF)
	return getUNDEF(VT);
	break;
	case ISD::BITREVERSE:
	assert(VT.isInteger() && VT == Operand.getValueType() &&
	"Invalid BITREVERSE!");
	if (OpOpcode == ISD::UNDEF)
	return getUNDEF(VT);
	break;
	case ISD::BITCAST:
	// Basic sanity checking.
	assert(VT.getSizeInBits() == Operand.getValueSizeInBits() &&
	"Cannot BITCAST between types of different sizes!");
	if (VT == Operand.getValueType()) return Operand; // noop conversion.
	if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
	return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
	if (OpOpcode == ISD::UNDEF)
	return getUNDEF(VT);
	break;
	case ISD::SCALAR_TO_VECTOR:
	assert(VT.isVector() && !Operand.getValueType().isVector() &&
	(VT.getVectorElementType() == Operand.getValueType() \|\|
	(VT.getVectorElementType().isInteger() &&
	Operand.getValueType().isInteger() &&
	VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
	"Illegal SCALAR_TO_VECTOR node!");
	if (OpOpcode == ISD::UNDEF)
	return getUNDEF(VT);
	// scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
	if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
	isa<ConstantSDNode>(Operand.getOperand(1)) &&
	Operand.getConstantOperandVal(1) == 0 &&
	Operand.getOperand(0).getValueType() == VT)
	return Operand.getOperand(0);
	break;
	case ISD::FNEG:
	// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
	if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
	// FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
	return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
	Operand.getOperand(0), Operand.getNode()->getFlags());
	if (OpOpcode == ISD::FNEG) // --X -> X
	return Operand.getOperand(0);
	break;
	case ISD::FABS:
	if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
	return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));
	break;
	}

	SDNode *N;
	SDVTList VTs = getVTList(VT);
	SDValue Ops[] = {Operand};
	if (VT != MVT::Glue) { // Don't CSE flag producing nodes
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opcode, VTs, Ops);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
	E->intersectFlagsWith(Flags);
	return SDValue(E, 0);
	}

	N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
	N->setFlags(Flags);
	createOperands(N, Ops);
	CSEMap.InsertNode(N, IP);
	} else {
	N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
	createOperands(N, Ops);
	}

	InsertNode(N);
	return SDValue(N, 0);
	}

	static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
	const APInt &C2) {
	switch (Opcode) {
	case ISD::ADD: return std::make_pair(C1 + C2, true);
	case ISD::SUB: return std::make_pair(C1 - C2, true);
	case ISD::MUL: return std::make_pair(C1 * C2, true);
	case ISD::AND: return std::make_pair(C1 & C2, true);
	case ISD::OR: return std::make_pair(C1 \| C2, true);
	case ISD::XOR: return std::make_pair(C1 ^ C2, true);
	case ISD::SHL: return std::make_pair(C1 << C2, true);
	case ISD::SRL: return std::make_pair(C1.lshr(C2), true);
	case ISD::SRA: return std::make_pair(C1.ashr(C2), true);
	case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
	case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
	case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true);
	case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
	case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
	case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
	case ISD::UDIV:
	if (!C2.getBoolValue())
	break;
	return std::make_pair(C1.udiv(C2), true);
	case ISD::UREM:
	if (!C2.getBoolValue())
	break;
	return std::make_pair(C1.urem(C2), true);
	case ISD::SDIV:
	if (!C2.getBoolValue())
	break;
	return std::make_pair(C1.sdiv(C2), true);
	case ISD::SREM:
	if (!C2.getBoolValue())
	break;
	return std::make_pair(C1.srem(C2), true);
	}
	return std::make_pair(APInt(1, 0), false);
	}

	SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
	EVT VT, const ConstantSDNode *Cst1,
	const ConstantSDNode *Cst2) {
	if (Cst1->isOpaque() \|\| Cst2->isOpaque())
	return SDValue();

	std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(),
	Cst2->getAPIntValue());
	if (!Folded.second)
	return SDValue();
	return getConstant(Folded.first, DL, VT);
	}

	SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
	const GlobalAddressSDNode *GA,
	const SDNode *N2) {
	if (GA->getOpcode() != ISD::GlobalAddress)
	return SDValue();
	if (!TLI->isOffsetFoldingLegal(GA))
	return SDValue();
	const ConstantSDNode *Cst2 = dyn_cast<ConstantSDNode>(N2);
	if (!Cst2)
	return SDValue();
	int64_t Offset = Cst2->getSExtValue();
	switch (Opcode) {
	case ISD::ADD: break;
	case ISD::SUB: Offset = -uint64_t(Offset); break;
	default: return SDValue();
	}
	return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT,
	GA->getOffset() + uint64_t(Offset));
	}

	bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
	switch (Opcode) {
	case ISD::SDIV:
	case ISD::UDIV:
	case ISD::SREM:
	case ISD::UREM: {
	// If a divisor is zero/undef or any element of a divisor vector is
	// zero/undef, the whole op is undef.
	assert(Ops.size() == 2 && "Div/rem should have 2 operands");
	SDValue Divisor = Ops[1];
	if (Divisor.isUndef() \|\| isNullConstant(Divisor))
	return true;

	return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) &&
	llvm::any_of(Divisor->op_values(),
	[](SDValue V) { return V.isUndef() \|\|
	isNullConstant(V); });
	// TODO: Handle signed overflow.
	}
	// TODO: Handle oversized shifts.
	default:
	return false;
	}
	}

	SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
	EVT VT, SDNode *Cst1,
	SDNode *Cst2) {
	// If the opcode is a target-specific ISD node, there's nothing we can
	// do here and the operand rules may not line up with the below, so
	// bail early.
	if (Opcode >= ISD::BUILTIN_OP_END)
	return SDValue();

	if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)}))
	return getUNDEF(VT);

	// Handle the case of two scalars.
	if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
	if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
	SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2);
	assert((!Folded \|\| !VT.isVector()) &&
	"Can't fold vectors ops with scalar operands");
	return Folded;
	}
	}

	// fold (add Sym, c) -> Sym+c
	if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1))
	return FoldSymbolOffset(Opcode, VT, GA, Cst2);
	if (TLI->isCommutativeBinOp(Opcode))
	if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2))
	return FoldSymbolOffset(Opcode, VT, GA, Cst1);

	// For vectors extract each constant element into Inputs so we can constant
	// fold them individually.
	BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
	BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
	if (!BV1 \|\| !BV2)
	return SDValue();

	assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");

	EVT SVT = VT.getScalarType();
	SmallVector<SDValue, 4> Outputs;
	for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
	SDValue V1 = BV1->getOperand(I);
	SDValue V2 = BV2->getOperand(I);

	// Avoid BUILD_VECTOR nodes that perform implicit truncation.
	// FIXME: This is valid and could be handled by truncation.
	if (V1->getValueType(0) != SVT \|\| V2->getValueType(0) != SVT)
	return SDValue();

	// Fold one vector element.
	SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);

	// Scalar folding only succeeded if the result is a constant or UNDEF.
	if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
	ScalarResult.getOpcode() != ISD::ConstantFP)
	return SDValue();
	Outputs.push_back(ScalarResult);
	}

	assert(VT.getVectorNumElements() == Outputs.size() &&
	"Vector size mismatch!");

	// We may have a vector type but a scalar result. Create a splat.
	Outputs.resize(VT.getVectorNumElements(), Outputs.back());

	// Build a big vector out of the scalar elements we generated.
	return getBuildVector(VT, SDLoc(), Outputs);
	}

	SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
	const SDLoc &DL, EVT VT,
	ArrayRef<SDValue> Ops,
	const SDNodeFlags Flags) {
	// If the opcode is a target-specific ISD node, there's nothing we can
	// do here and the operand rules may not line up with the below, so
	// bail early.
	if (Opcode >= ISD::BUILTIN_OP_END)
	return SDValue();

	if (isUndef(Opcode, Ops))
	return getUNDEF(VT);

	// We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
	if (!VT.isVector())
	return SDValue();

	unsigned NumElts = VT.getVectorNumElements();

	auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
	return !Op.getValueType().isVector() \|\|
	Op.getValueType().getVectorNumElements() == NumElts;
	};

	auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) {
	BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
	return (Op.isUndef()) \|\| (Op.getOpcode() == ISD::CONDCODE) \|\|
	(BV && BV->isConstant());
	};

	// All operands must be vector types with the same number of elements as
	// the result type and must be either UNDEF or a build vector of constant
	// or UNDEF scalars.
	if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) \|\|
	!llvm::all_of(Ops, IsScalarOrSameVectorSize))
	return SDValue();

	// If we are comparing vectors, then the result needs to be a i1 boolean
	// that is then sign-extended back to the legal result type.
	EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType());

	// Find legal integer scalar type for constant promotion and
	// ensure that its scalar size is at least as large as source.
	EVT LegalSVT = VT.getScalarType();
	if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
	LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
	if (LegalSVT.bitsLT(VT.getScalarType()))
	return SDValue();
	}

	// Constant fold each scalar lane separately.
	SmallVector<SDValue, 4> ScalarResults;
	for (unsigned i = 0; i != NumElts; i++) {
	SmallVector<SDValue, 4> ScalarOps;
	for (SDValue Op : Ops) {
	EVT InSVT = Op.getValueType().getScalarType();
	BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op);
	if (!InBV) {
	// We've checked that this is UNDEF or a constant of some kind.
	if (Op.isUndef())
	ScalarOps.push_back(getUNDEF(InSVT));
	else
	ScalarOps.push_back(Op);
	continue;
	}

	SDValue ScalarOp = InBV->getOperand(i);
	EVT ScalarVT = ScalarOp.getValueType();

	// Build vector (integer) scalar operands may need implicit
	// truncation - do this before constant folding.
	if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT))
	ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp);

	ScalarOps.push_back(ScalarOp);
	}

	// Constant fold the scalar operands.
	SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);

	// Legalize the (integer) scalar constant if necessary.
	if (LegalSVT != SVT)
	ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);

	// Scalar folding only succeeded if the result is a constant or UNDEF.
	if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
	ScalarResult.getOpcode() != ISD::ConstantFP)
	return SDValue();
	ScalarResults.push_back(ScalarResult);
	}

	return getBuildVector(VT, DL, ScalarResults);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
	SDValue N1, SDValue N2, const SDNodeFlags Flags) {
	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
	ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
	ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
	ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);

	// Canonicalize constant to RHS if commutative.
	if (TLI->isCommutativeBinOp(Opcode)) {
	if (N1C && !N2C) {
	std::swap(N1C, N2C);
	std::swap(N1, N2);
	} else if (N1CFP && !N2CFP) {
	std::swap(N1CFP, N2CFP);
	std::swap(N1, N2);
	}
	}

	switch (Opcode) {
	default: break;
	case ISD::TokenFactor:
	assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
	N2.getValueType() == MVT::Other && "Invalid token factor!");
	// Fold trivial token factors.
	if (N1.getOpcode() == ISD::EntryToken) return N2;
	if (N2.getOpcode() == ISD::EntryToken) return N1;
	if (N1 == N2) return N1;
	break;
	case ISD::CONCAT_VECTORS: {
	// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
	SDValue Ops[] = {N1, N2};
	if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
	return V;
	break;
	}
	case ISD::AND:
	assert(VT.isInteger() && "This operator does not apply to FP types!");
	assert(N1.getValueType() == N2.getValueType() &&
	N1.getValueType() == VT && "Binary operator types must match!");
	// (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
	// worth handling here.
	if (N2C && N2C->isNullValue())
	return N2;
	if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
	return N1;
	break;
	case ISD::OR:
	case ISD::XOR:
	case ISD::ADD:
	case ISD::SUB:
	assert(VT.isInteger() && "This operator does not apply to FP types!");
	assert(N1.getValueType() == N2.getValueType() &&
	N1.getValueType() == VT && "Binary operator types must match!");
	// (X ^\|+- 0) -> X. This commonly occurs when legalizing i64 values, so
	// it's worth handling here.
	if (N2C && N2C->isNullValue())
	return N1;
	break;
	case ISD::UDIV:
	case ISD::UREM:
	case ISD::MULHU:
	case ISD::MULHS:
	case ISD::MUL:
	case ISD::SDIV:
	case ISD::SREM:
	case ISD::SMIN:
	case ISD::SMAX:
	case ISD::UMIN:
	case ISD::UMAX:
	assert(VT.isInteger() && "This operator does not apply to FP types!");
	assert(N1.getValueType() == N2.getValueType() &&
	N1.getValueType() == VT && "Binary operator types must match!");
	break;
	case ISD::FADD:
	case ISD::FSUB:
	case ISD::FMUL:
	case ISD::FDIV:
	case ISD::FREM:
	if (getTarget().Options.UnsafeFPMath) {
	if (Opcode == ISD::FADD) {
	// x+0 --> x
	if (N2CFP && N2CFP->getValueAPF().isZero())
	return N1;
	} else if (Opcode == ISD::FSUB) {
	// x-0 --> x
	if (N2CFP && N2CFP->getValueAPF().isZero())
	return N1;
	} else if (Opcode == ISD::FMUL) {
	// x*0 --> 0
	if (N2CFP && N2CFP->isZero())
	return N2;
	// x*1 --> x
	if (N2CFP && N2CFP->isExactlyValue(1.0))
	return N1;
	}
	}
	assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
	assert(N1.getValueType() == N2.getValueType() &&
	N1.getValueType() == VT && "Binary operator types must match!");
	break;
	case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
	assert(N1.getValueType() == VT &&
	N1.getValueType().isFloatingPoint() &&
	N2.getValueType().isFloatingPoint() &&
	"Invalid FCOPYSIGN!");
	break;
	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL:
	case ISD::ROTL:
	case ISD::ROTR:
	assert(VT == N1.getValueType() &&
	"Shift operators return type must be the same as their first arg");
	assert(VT.isInteger() && N2.getValueType().isInteger() &&
	"Shifts only work on integers");
	assert((!VT.isVector() \|\| VT == N2.getValueType()) &&
	"Vector shift amounts must be in the same as their first arg");
	// Verify that the shift amount VT is bit enough to hold valid shift
	// amounts. This catches things like trying to shift an i1024 value by an
	// i8, which is easy to fall into in generic code that uses
	// TLI.getShiftAmount().
	assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) &&
	"Invalid use of small shift amount with oversized value!");

	// Always fold shifts of i1 values so the code generator doesn't need to
	// handle them. Since we know the size of the shift has to be less than the
	// size of the value, the shift/rotate count is guaranteed to be zero.
	if (VT == MVT::i1)
	return N1;
	if (N2C && N2C->isNullValue())
	return N1;
	break;
	case ISD::FP_ROUND_INREG: {
	EVT EVT = cast<VTSDNode>(N2)->getVT();
	assert(VT == N1.getValueType() && "Not an inreg round!");
	assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
	"Cannot FP_ROUND_INREG integer types");
	assert(EVT.isVector() == VT.isVector() &&
	"FP_ROUND_INREG type should be vector iff the operand "
	"type is vector!");
	assert((!EVT.isVector() \|\|
	EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
	"Vector element counts must match in FP_ROUND_INREG");
	assert(EVT.bitsLE(VT) && "Not rounding down!");
	(void)EVT;
	if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
	break;
	}
	case ISD::FP_ROUND:
	assert(VT.isFloatingPoint() &&
	N1.getValueType().isFloatingPoint() &&
	VT.bitsLE(N1.getValueType()) &&
	N2C && (N2C->getZExtValue() == 0 \|\| N2C->getZExtValue() == 1) &&
	"Invalid FP_ROUND!");
	if (N1.getValueType() == VT) return N1; // noop conversion.
	break;
	case ISD::AssertSext:
	case ISD::AssertZext: {
	EVT EVT = cast<VTSDNode>(N2)->getVT();
	assert(VT == N1.getValueType() && "Not an inreg extend!");
	assert(VT.isInteger() && EVT.isInteger() &&
	"Cannot *_EXTEND_INREG FP types");
	assert(!EVT.isVector() &&
	"AssertSExt/AssertZExt type should be the vector element type "
	"rather than the vector type!");
	assert(EVT.bitsLE(VT) && "Not extending!");
	if (VT == EVT) return N1; // noop assertion.
	break;
	}
	case ISD::SIGN_EXTEND_INREG: {
	EVT EVT = cast<VTSDNode>(N2)->getVT();
	assert(VT == N1.getValueType() && "Not an inreg extend!");
	assert(VT.isInteger() && EVT.isInteger() &&
	"Cannot *_EXTEND_INREG FP types");
	assert(EVT.isVector() == VT.isVector() &&
	"SIGN_EXTEND_INREG type should be vector iff the operand "
	"type is vector!");
	assert((!EVT.isVector() \|\|
	EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
	"Vector element counts must match in SIGN_EXTEND_INREG");
	assert(EVT.bitsLE(VT) && "Not extending!");
	if (EVT == VT) return N1; // Not actually extending

	auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) {
	unsigned FromBits = EVT.getScalarSizeInBits();
	Val <<= Val.getBitWidth() - FromBits;
	Val.ashrInPlace(Val.getBitWidth() - FromBits);
	return getConstant(Val, DL, ConstantVT);
	};

	if (N1C) {
	const APInt &Val = N1C->getAPIntValue();
	return SignExtendInReg(Val, VT);
	}
	if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
	SmallVector<SDValue, 8> Ops;
	llvm::EVT OpVT = N1.getOperand(0).getValueType();
	for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
	SDValue Op = N1.getOperand(i);
	if (Op.isUndef()) {
	Ops.push_back(getUNDEF(OpVT));
	continue;
	}
	ConstantSDNode *C = cast<ConstantSDNode>(Op);
	APInt Val = C->getAPIntValue();
	Ops.push_back(SignExtendInReg(Val, OpVT));
	}
	return getBuildVector(VT, DL, Ops);
	}
	break;
	}
	case ISD::EXTRACT_VECTOR_ELT:
	// EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
	if (N1.isUndef())
	return getUNDEF(VT);

	// EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
	if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements())
	return getUNDEF(VT);

	// EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
	// expanding copies of large vectors from registers.
	if (N2C &&
	N1.getOpcode() == ISD::CONCAT_VECTORS &&
	N1.getNumOperands() > 0) {
	unsigned Factor =
	N1.getOperand(0).getValueType().getVectorNumElements();
	return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
	N1.getOperand(N2C->getZExtValue() / Factor),
	getConstant(N2C->getZExtValue() % Factor, DL,
	N2.getValueType()));
	}

	// EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
	// expanding large vector constants.
	if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
	SDValue Elt = N1.getOperand(N2C->getZExtValue());

	if (VT != Elt.getValueType())
	// If the vector element type is not legal, the BUILD_VECTOR operands
	// are promoted and implicitly truncated, and the result implicitly
	// extended. Make that explicit here.
	Elt = getAnyExtOrTrunc(Elt, DL, VT);

	return Elt;
	}

	// EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
	// operations are lowered to scalars.
	if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
	// If the indices are the same, return the inserted element else
	// if the indices are known different, extract the element from
	// the original vector.
	SDValue N1Op2 = N1.getOperand(2);
	ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2);

	if (N1Op2C && N2C) {
	if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
	if (VT == N1.getOperand(1).getValueType())
	return N1.getOperand(1);
	else
	return getSExtOrTrunc(N1.getOperand(1), DL, VT);
	}

	return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
	}
	}
	break;
	case ISD::EXTRACT_ELEMENT:
	assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
	assert(!N1.getValueType().isVector() && !VT.isVector() &&
	(N1.getValueType().isInteger() == VT.isInteger()) &&
	N1.getValueType() != VT &&
	"Wrong types for EXTRACT_ELEMENT!");

	// EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
	// 64-bit integers into 32-bit parts. Instead of building the extract of
	// the BUILD_PAIR, only to have legalize rip it apart, just do it now.
	if (N1.getOpcode() == ISD::BUILD_PAIR)
	return N1.getOperand(N2C->getZExtValue());

	// EXTRACT_ELEMENT of a constant int is also very common.
	if (N1C) {
	unsigned ElementSize = VT.getSizeInBits();
	unsigned Shift = ElementSize * N2C->getZExtValue();
	APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift);
	return getConstant(ShiftedVal.trunc(ElementSize), DL, VT);
	}
	break;
	case ISD::EXTRACT_SUBVECTOR:
	if (VT.isSimple() && N1.getValueType().isSimple()) {
	assert(VT.isVector() && N1.getValueType().isVector() &&
	"Extract subvector VTs must be a vectors!");
	assert(VT.getVectorElementType() ==
	N1.getValueType().getVectorElementType() &&
	"Extract subvector VTs must have the same element type!");
	assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
	"Extract subvector must be from larger vector to smaller vector!");

	if (N2C) {
	assert((VT.getVectorNumElements() + N2C->getZExtValue()
	<= N1.getValueType().getVectorNumElements())
	&& "Extract subvector overflow!");
	}

	// Trivial extraction.
	if (VT.getSimpleVT() == N1.getSimpleValueType())
	return N1;

	// EXTRACT_SUBVECTOR of an UNDEF is an UNDEF.
	if (N1.isUndef())
	return getUNDEF(VT);

	// EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
	// the concat have the same type as the extract.
	if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
	N1.getNumOperands() > 0 &&
	VT == N1.getOperand(0).getValueType()) {
	unsigned Factor = VT.getVectorNumElements();
	return N1.getOperand(N2C->getZExtValue() / Factor);
	}

	// EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
	// during shuffle legalization.
	if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
	VT == N1.getOperand(1).getValueType())
	return N1.getOperand(1);
	}
	break;
	}

	// Perform trivial constant folding.
	if (SDValue SV =
	FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
	return SV;

	// Constant fold FP operations.
	bool HasFPExceptions = TLI->hasFloatingPointExceptions();
	if (N1CFP) {
	if (N2CFP) {
	APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
	APFloat::opStatus s;
	switch (Opcode) {
	case ISD::FADD:
	s = V1.add(V2, APFloat::rmNearestTiesToEven);
	if (!HasFPExceptions \|\| s != APFloat::opInvalidOp)
	return getConstantFP(V1, DL, VT);
	break;
	case ISD::FSUB:
	s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
	if (!HasFPExceptions \|\| s!=APFloat::opInvalidOp)
	return getConstantFP(V1, DL, VT);
	break;
	case ISD::FMUL:
	s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
	if (!HasFPExceptions \|\| s!=APFloat::opInvalidOp)
	return getConstantFP(V1, DL, VT);
	break;
	case ISD::FDIV:
	s = V1.divide(V2, APFloat::rmNearestTiesToEven);
	if (!HasFPExceptions \|\| (s!=APFloat::opInvalidOp &&
	s!=APFloat::opDivByZero)) {
	return getConstantFP(V1, DL, VT);
	}
	break;
	case ISD::FREM :
	s = V1.mod(V2);
	if (!HasFPExceptions \|\| (s!=APFloat::opInvalidOp &&
	s!=APFloat::opDivByZero)) {
	return getConstantFP(V1, DL, VT);
	}
	break;
	case ISD::FCOPYSIGN:
	V1.copySign(V2);
	return getConstantFP(V1, DL, VT);
	default: break;
	}
	}

	if (Opcode == ISD::FP_ROUND) {
	APFloat V = N1CFP->getValueAPF(); // make copy
	bool ignored;
	// This can return overflow, underflow, or inexact; we don't care.
	// FIXME need to be more flexible about rounding mode.
	(void)V.convert(EVTToAPFloatSemantics(VT),
	APFloat::rmNearestTiesToEven, &ignored);
	return getConstantFP(V, DL, VT);
	}
	}

	// Canonicalize an UNDEF to the RHS, even over a constant.
	if (N1.isUndef()) {
	if (TLI->isCommutativeBinOp(Opcode)) {
	std::swap(N1, N2);
	} else {
	switch (Opcode) {
	case ISD::FP_ROUND_INREG:
	case ISD::SIGN_EXTEND_INREG:
	case ISD::SUB:
	case ISD::FSUB:
	case ISD::FDIV:
	case ISD::FREM:
	case ISD::SRA:
	return N1; // fold op(undef, arg2) -> undef
	case ISD::UDIV:
	case ISD::SDIV:
	case ISD::UREM:
	case ISD::SREM:
	case ISD::SRL:
	case ISD::SHL:
	if (!VT.isVector())
	return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
	// For vectors, we can't easily build an all zero vector, just return
	// the LHS.
	return N2;
	}
	}
	}

	// Fold a bunch of operators when the RHS is undef.
	if (N2.isUndef()) {
	switch (Opcode) {
	case ISD::XOR:
	if (N1.isUndef())
	// Handle undef ^ undef -> 0 special case. This is a common
	// idiom (misuse).
	return getConstant(0, DL, VT);
	LLVM_FALLTHROUGH;
	case ISD::ADD:
	case ISD::ADDC:
	case ISD::ADDE:
	case ISD::SUB:
	case ISD::UDIV:
	case ISD::SDIV:
	case ISD::UREM:
	case ISD::SREM:
	return N2; // fold op(arg1, undef) -> undef
	case ISD::FADD:
	case ISD::FSUB:
	case ISD::FMUL:
	case ISD::FDIV:
	case ISD::FREM:
	if (getTarget().Options.UnsafeFPMath)
	return N2;
	break;
	case ISD::MUL:
	case ISD::AND:
	case ISD::SRL:
	case ISD::SHL:
	if (!VT.isVector())
	return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
	// For vectors, we can't easily build an all zero vector, just return
	// the LHS.
	return N1;
	case ISD::OR:
	if (!VT.isVector())
	return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
	// For vectors, we can't easily build an all one vector, just return
	// the LHS.
	return N1;
	case ISD::SRA:
	return N1;
	}
	}

	// Memoize this node if possible.
	SDNode *N;
	SDVTList VTs = getVTList(VT);
	SDValue Ops[] = {N1, N2};
	if (VT != MVT::Glue) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opcode, VTs, Ops);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
	E->intersectFlagsWith(Flags);
	return SDValue(E, 0);
	}

	N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
	N->setFlags(Flags);
	createOperands(N, Ops);
	CSEMap.InsertNode(N, IP);
	} else {
	N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
	createOperands(N, Ops);
	}

	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
	SDValue N1, SDValue N2, SDValue N3) {
	// Perform various simplifications.
	switch (Opcode) {
	case ISD::FMA: {
	ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
	ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
	ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3);
	if (N1CFP && N2CFP && N3CFP) {
	APFloat V1 = N1CFP->getValueAPF();
	const APFloat &V2 = N2CFP->getValueAPF();
	const APFloat &V3 = N3CFP->getValueAPF();
	APFloat::opStatus s =
	V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
	if (!TLI->hasFloatingPointExceptions() \|\| s != APFloat::opInvalidOp)
	return getConstantFP(V1, DL, VT);
	}
	break;
	}
	case ISD::CONCAT_VECTORS: {
	// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
	SDValue Ops[] = {N1, N2, N3};
	if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
	return V;
	break;
	}
	case ISD::SETCC: {
	// Use FoldSetCC to simplify SETCC's.
	if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
	return V;
	// Vector constant folding.
	SDValue Ops[] = {N1, N2, N3};
	if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
	return V;
	break;
	}
	case ISD::SELECT:
	if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
	if (N1C->getZExtValue())
	return N2; // select true, X, Y -> X
	return N3; // select false, X, Y -> Y
	}

	if (N2 == N3) return N2; // select C, X, X -> X
	break;
	case ISD::VECTOR_SHUFFLE:
	llvm_unreachable("should use getVectorShuffle constructor!");
	case ISD::INSERT_VECTOR_ELT: {
	ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
	// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
	if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
	return getUNDEF(VT);
	break;
	}
	case ISD::INSERT_SUBVECTOR: {
	SDValue Index = N3;
	if (VT.isSimple() && N1.getValueType().isSimple()
	&& N2.getValueType().isSimple()) {
	assert(VT.isVector() && N1.getValueType().isVector() &&
	N2.getValueType().isVector() &&
	"Insert subvector VTs must be a vectors");
	assert(VT == N1.getValueType() &&
	"Dest and insert subvector source types must match!");
	assert(N2.getSimpleValueType() <= N1.getSimpleValueType() &&
	"Insert subvector must be from smaller vector to larger vector!");
	if (isa<ConstantSDNode>(Index)) {
	assert((N2.getValueType().getVectorNumElements() +
	cast<ConstantSDNode>(Index)->getZExtValue()
	<= VT.getVectorNumElements())
	&& "Insert subvector overflow!");
	}

	// Trivial insertion.
	if (VT.getSimpleVT() == N2.getSimpleValueType())
	return N2;
	}
	break;
	}
	case ISD::BITCAST:
	// Fold bit_convert nodes from a type to themselves.
	if (N1.getValueType() == VT)
	return N1;
	break;
	}

	// Memoize node if it doesn't produce a flag.
	SDNode *N;
	SDVTList VTs = getVTList(VT);
	SDValue Ops[] = {N1, N2, N3};
	if (VT != MVT::Glue) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opcode, VTs, Ops);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
	return SDValue(E, 0);

	N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
	createOperands(N, Ops);
	CSEMap.InsertNode(N, IP);
	} else {
	N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
	createOperands(N, Ops);
	}

	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
	SDValue N1, SDValue N2, SDValue N3, SDValue N4) {
	SDValue Ops[] = { N1, N2, N3, N4 };
	return getNode(Opcode, DL, VT, Ops);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
	SDValue N1, SDValue N2, SDValue N3, SDValue N4,
	SDValue N5) {
	SDValue Ops[] = { N1, N2, N3, N4, N5 };
	return getNode(Opcode, DL, VT, Ops);
	}

	/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
	/// the incoming stack arguments to be loaded from the stack.
	SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
	SmallVector<SDValue, 8> ArgChains;

	// Include the original chain at the beginning of the list. When this is
	// used by target LowerCall hooks, this helps legalize find the
	// CALLSEQ_BEGIN node.
	ArgChains.push_back(Chain);

	// Add a chain value for each stack argument.
	for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
	UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
	if (LoadSDNode L = dyn_cast<LoadSDNode>(U))
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
	if (FI->getIndex() < 0)
	ArgChains.push_back(SDValue(L, 1));

	// Build a tokenfactor for all the chains.
	return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
	}

	/// getMemsetValue - Vectorized representation of the memset value
	/// operand.
	static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
	const SDLoc &dl) {
	assert(!Value.isUndef());

	unsigned NumBits = VT.getScalarSizeInBits();
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
	assert(C->getAPIntValue().getBitWidth() == 8);
	APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
	if (VT.isInteger())
	return DAG.getConstant(Val, dl, VT);
	return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl,
	VT);
	}

	assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?");
	EVT IntVT = VT.getScalarType();
	if (!IntVT.isInteger())
	IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits());

	Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value);
	if (NumBits > 8) {
	// Use a multiplication with 0x010101... to extend the input to the
	// required length.
	APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
	Value = DAG.getNode(ISD::MUL, dl, IntVT, Value,
	DAG.getConstant(Magic, dl, IntVT));
	}

	if (VT != Value.getValueType() && !VT.isInteger())
	Value = DAG.getBitcast(VT.getScalarType(), Value);
	if (VT != Value.getValueType())
	Value = DAG.getSplatBuildVector(VT, dl, Value);

	return Value;
	}

	/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
	/// used when a memcpy is turned into a memset when the source is a constant
	/// string ptr.
	static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
	const TargetLowering &TLI,
	const ConstantDataArraySlice &Slice) {
	// Handle vector with all elements zero.
	if (Slice.Array == nullptr) {
	if (VT.isInteger())
	return DAG.getConstant(0, dl, VT);
	else if (VT == MVT::f32 \|\| VT == MVT::f64 \|\| VT == MVT::f128)
	return DAG.getConstantFP(0.0, dl, VT);
	else if (VT.isVector()) {
	unsigned NumElts = VT.getVectorNumElements();
	MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
	return DAG.getNode(ISD::BITCAST, dl, VT,
	DAG.getConstant(0, dl,
	EVT::getVectorVT(*DAG.getContext(),
	EltVT, NumElts)));
	} else
	llvm_unreachable("Expected type!");
	}

	assert(!VT.isVector() && "Can't handle vector type here!");
	unsigned NumVTBits = VT.getSizeInBits();
	unsigned NumVTBytes = NumVTBits / 8;
	unsigned NumBytes = std::min(NumVTBytes, unsigned(Slice.Length));

	APInt Val(NumVTBits, 0);
	if (DAG.getDataLayout().isLittleEndian()) {
	for (unsigned i = 0; i != NumBytes; ++i)
	Val \|= (uint64_t)(unsigned char)Slice[i] << i*8;
	} else {
	for (unsigned i = 0; i != NumBytes; ++i)
	Val \|= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8;
	}

	// If the "cost" of materializing the integer immediate is less than the cost
	// of a load, then it is cost effective to turn the load into the immediate.
	Type Ty = VT.getTypeForEVT(DAG.getContext());
	if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty))
	return DAG.getConstant(Val, dl, VT);
	return SDValue(nullptr, 0);
	}

	SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset,
	const SDLoc &DL) {
	EVT VT = Base.getValueType();
	return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT));
	}

	/// Returns true if memcpy source is constant data.
	static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
	uint64_t SrcDelta = 0;
	GlobalAddressSDNode *G = nullptr;
	if (Src.getOpcode() == ISD::GlobalAddress)
	G = cast<GlobalAddressSDNode>(Src);
	else if (Src.getOpcode() == ISD::ADD &&
	Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
	Src.getOperand(1).getOpcode() == ISD::Constant) {
	G = cast<GlobalAddressSDNode>(Src.getOperand(0));
	SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
	}
	if (!G)
	return false;

	return getConstantDataArrayInfo(G->getGlobal(), Slice, 8,
	SrcDelta + G->getOffset());
	}

	/// Determines the optimal series of memory ops to replace the memset / memcpy.
	/// Return true if the number of memory ops is below the threshold (Limit).
	/// It returns the types of the sequence of memory ops to perform
	/// memset / memcpy by reference.
	static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
	unsigned Limit, uint64_t Size,
	unsigned DstAlign, unsigned SrcAlign,
	bool IsMemset,
	bool ZeroMemset,
	bool MemcpyStrSrc,
	bool AllowOverlap,
	unsigned DstAS, unsigned SrcAS,
	SelectionDAG &DAG,
	const TargetLowering &TLI) {
	assert((SrcAlign == 0 \|\| SrcAlign >= DstAlign) &&
	"Expecting memcpy / memset source to meet alignment requirement!");
	// If 'SrcAlign' is zero, that means the memory operation does not need to
	// load the value, i.e. memset or memcpy from constant string. Otherwise,
	// it's the inferred alignment of the source. 'DstAlign', on the other hand,
	// is the specified alignment of the memory operation. If it is zero, that
	// means it's possible to change the alignment of the destination.
	// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
	// not need to be loaded.
	EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
	IsMemset, ZeroMemset, MemcpyStrSrc,
	DAG.getMachineFunction());

	if (VT == MVT::Other) {
	// Use the largest integer type whose alignment constraints are satisfied.
	// We only need to check DstAlign here as SrcAlign is always greater or
	// equal to DstAlign (or zero).
	VT = MVT::i64;
	while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
	!TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
	VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
	assert(VT.isInteger());

	// Find the largest legal integer type.
	MVT LVT = MVT::i64;
	while (!TLI.isTypeLegal(LVT))
	LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
	assert(LVT.isInteger());

	// If the type we've chosen is larger than the largest legal integer type
	// then use that instead.
	if (VT.bitsGT(LVT))
	VT = LVT;
	}

	unsigned NumMemOps = 0;
	while (Size != 0) {
	unsigned VTSize = VT.getSizeInBits() / 8;
	while (VTSize > Size) {
	// For now, only use non-vector load / store's for the left-over pieces.
	EVT NewVT = VT;
	unsigned NewVTSize;

	bool Found = false;
	if (VT.isVector() \|\| VT.isFloatingPoint()) {
	NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
	if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
	TLI.isSafeMemOpType(NewVT.getSimpleVT()))
	Found = true;
	else if (NewVT == MVT::i64 &&
	TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
	TLI.isSafeMemOpType(MVT::f64)) {
	// i64 is usually not legal on 32-bit targets, but f64 may be.
	NewVT = MVT::f64;
	Found = true;
	}
	}

	if (!Found) {
	do {
	NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
	if (NewVT == MVT::i8)
	break;
	} while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
	}
	NewVTSize = NewVT.getSizeInBits() / 8;

	// If the new VT cannot cover all of the remaining bits, then consider
	// issuing a (or a pair of) unaligned and overlapping load / store.
	// FIXME: Only does this for 64-bit or more since we don't have proper
	// cost model for unaligned load / store.
	bool Fast;
	if (NumMemOps && AllowOverlap &&
	VTSize >= 8 && NewVTSize < Size &&
	TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast)
	VTSize = Size;
	else {
	VT = NewVT;
	VTSize = NewVTSize;
	}
	}

	if (++NumMemOps > Limit)
	return false;

	MemOps.push_back(VT);
	Size -= VTSize;
	}

	return true;
	}

	static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
	// On Darwin, -Os means optimize for size without hurting performance, so
	// only really optimize for size when -Oz (MinSize) is used.
	if (MF.getTarget().getTargetTriple().isOSDarwin())
	return MF.getFunction()->optForMinSize();
	return MF.getFunction()->optForSize();
	}

	static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
	SDValue Chain, SDValue Dst, SDValue Src,
	uint64_t Size, unsigned Align,
	bool isVol, bool AlwaysInline,
	MachinePointerInfo DstPtrInfo,
	MachinePointerInfo SrcPtrInfo) {
	// Turn a memcpy of undef to nop.
	if (Src.isUndef())
	return Chain;

	// Expand memcpy to a series of load and store ops if the size operand falls
	// below a certain threshold.
	// TODO: In the AlwaysInline case, if the size is big then generate a loop
	// rather than maybe a humongous number of loads and stores.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const DataLayout &DL = DAG.getDataLayout();
	LLVMContext &C = *DAG.getContext();
	std::vector<EVT> MemOps;
	bool DstAlignCanChange = false;
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	bool OptSize = shouldLowerMemFuncForSize(MF);
	FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
	if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
	DstAlignCanChange = true;
	unsigned SrcAlign = DAG.InferPtrAlignment(Src);
	if (Align > SrcAlign)
	SrcAlign = Align;
	ConstantDataArraySlice Slice;
	bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
	bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
	unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);

	if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
	(DstAlignCanChange ? 0 : Align),
	(isZeroConstant ? 0 : SrcAlign),
	false, false, CopyFromConstant, true,
	DstPtrInfo.getAddrSpace(),
	SrcPtrInfo.getAddrSpace(),
	DAG, TLI))
	return SDValue();

	if (DstAlignCanChange) {
	Type *Ty = MemOps[0].getTypeForEVT(C);
	unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);

	// Don't promote to an alignment that would require dynamic stack
	// realignment.
	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
	if (!TRI->needsStackRealignment(MF))
	while (NewAlign > Align &&
	DL.exceedsNaturalStackAlignment(NewAlign))
	NewAlign /= 2;

	if (NewAlign > Align) {
	// Give the stack frame object a larger alignment if needed.
	if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
	MFI.setObjectAlignment(FI->getIndex(), NewAlign);
	Align = NewAlign;
	}
	}

	MachineMemOperand::Flags MMOFlags =
	isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
	SmallVector<SDValue, 8> OutChains;
	unsigned NumMemOps = MemOps.size();
	uint64_t SrcOff = 0, DstOff = 0;
	for (unsigned i = 0; i != NumMemOps; ++i) {
	EVT VT = MemOps[i];
	unsigned VTSize = VT.getSizeInBits() / 8;
	SDValue Value, Store;

	if (VTSize > Size) {
	// Issuing an unaligned load / store pair that overlaps with the previous
	// pair. Adjust the offset accordingly.
	assert(i == NumMemOps-1 && i != 0);
	SrcOff -= VTSize - Size;
	DstOff -= VTSize - Size;
	}

	if (CopyFromConstant &&
	(isZeroConstant \|\| (VT.isInteger() && !VT.isVector()))) {
	// It's unlikely a store of a vector immediate can be done in a single
	// instruction. It would require a load from a constantpool first.
	// We only handle zero vectors here.
	// FIXME: Handle other cases where store of vector immediate is done in
	// a single instruction.
	ConstantDataArraySlice SubSlice;
	if (SrcOff < Slice.Length) {
	SubSlice = Slice;
	SubSlice.move(SrcOff);
	} else {
	// This is an out-of-bounds access and hence UB. Pretend we read zero.
	SubSlice.Array = nullptr;
	SubSlice.Offset = 0;
	SubSlice.Length = VTSize;
	}
	Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
	if (Value.getNode())
	Store = DAG.getStore(Chain, dl, Value,
	DAG.getMemBasePlusOffset(Dst, DstOff, dl),
	DstPtrInfo.getWithOffset(DstOff), Align,
	MMOFlags);
	}

	if (!Store.getNode()) {
	// The type might not be legal for the target. This should only happen
	// if the type is smaller than a legal type, as on PPC, so the right
	// thing to do is generate a LoadExt/StoreTrunc pair. These simplify
	// to Load/Store if NVT==VT.
	// FIXME does the case above also need this?
	EVT NVT = TLI.getTypeToTransformTo(C, VT);
	assert(NVT.bitsGE(VT));

	bool isDereferenceable =
	SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
	MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
	if (isDereferenceable)
	SrcMMOFlags \|= MachineMemOperand::MODereferenceable;

	Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
	DAG.getMemBasePlusOffset(Src, SrcOff, dl),
	SrcPtrInfo.getWithOffset(SrcOff), VT,
	MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
	OutChains.push_back(Value.getValue(1));
	Store = DAG.getTruncStore(
	Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
	DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);
	}
	OutChains.push_back(Store);
	SrcOff += VTSize;
	DstOff += VTSize;
	Size -= VTSize;
	}

	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
	}

	static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
	SDValue Chain, SDValue Dst, SDValue Src,
	uint64_t Size, unsigned Align,
	bool isVol, bool AlwaysInline,
	MachinePointerInfo DstPtrInfo,
	MachinePointerInfo SrcPtrInfo) {
	// Turn a memmove of undef to nop.
	if (Src.isUndef())
	return Chain;

	// Expand memmove to a series of load and store ops if the size operand falls
	// below a certain threshold.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const DataLayout &DL = DAG.getDataLayout();
	LLVMContext &C = *DAG.getContext();
	std::vector<EVT> MemOps;
	bool DstAlignCanChange = false;
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	bool OptSize = shouldLowerMemFuncForSize(MF);
	FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
	if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
	DstAlignCanChange = true;
	unsigned SrcAlign = DAG.InferPtrAlignment(Src);
	if (Align > SrcAlign)
	SrcAlign = Align;
	unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);

	if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
	(DstAlignCanChange ? 0 : Align), SrcAlign,
	false, false, false, false,
	DstPtrInfo.getAddrSpace(),
	SrcPtrInfo.getAddrSpace(),
	DAG, TLI))
	return SDValue();

	if (DstAlignCanChange) {
	Type *Ty = MemOps[0].getTypeForEVT(C);
	unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
	if (NewAlign > Align) {
	// Give the stack frame object a larger alignment if needed.
	if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
	MFI.setObjectAlignment(FI->getIndex(), NewAlign);
	Align = NewAlign;
	}
	}

	MachineMemOperand::Flags MMOFlags =
	isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
	uint64_t SrcOff = 0, DstOff = 0;
	SmallVector<SDValue, 8> LoadValues;
	SmallVector<SDValue, 8> LoadChains;
	SmallVector<SDValue, 8> OutChains;
	unsigned NumMemOps = MemOps.size();
	for (unsigned i = 0; i < NumMemOps; i++) {
	EVT VT = MemOps[i];
	unsigned VTSize = VT.getSizeInBits() / 8;
	SDValue Value;

	bool isDereferenceable =
	SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
	MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
	if (isDereferenceable)
	SrcMMOFlags \|= MachineMemOperand::MODereferenceable;

	Value =
	DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
	SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags);
	LoadValues.push_back(Value);
	LoadChains.push_back(Value.getValue(1));
	SrcOff += VTSize;
	}
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
	OutChains.clear();
	for (unsigned i = 0; i < NumMemOps; i++) {
	EVT VT = MemOps[i];
	unsigned VTSize = VT.getSizeInBits() / 8;
	SDValue Store;

	Store = DAG.getStore(Chain, dl, LoadValues[i],
	DAG.getMemBasePlusOffset(Dst, DstOff, dl),
	DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags);
	OutChains.push_back(Store);
	DstOff += VTSize;
	}

	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
	}

	/// \brief Lower the call to 'memset' intrinsic function into a series of store
	/// operations.
	///
	/// \param DAG Selection DAG where lowered code is placed.
	/// \param dl Link to corresponding IR location.
	/// \param Chain Control flow dependency.
	/// \param Dst Pointer to destination memory location.
	/// \param Src Value of byte to write into the memory.
	/// \param Size Number of bytes to write.
	/// \param Align Alignment of the destination in bytes.
	/// \param isVol True if destination is volatile.
	/// \param DstPtrInfo IR information on the memory pointer.
	/// \returns New head in the control flow, if lowering was successful, empty
	/// SDValue otherwise.
	///
	/// The function tries to replace 'llvm.memset' intrinsic with several store
	/// operations and value calculation code. This is usually profitable for small
	/// memory size.
	static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
	SDValue Chain, SDValue Dst, SDValue Src,
	uint64_t Size, unsigned Align, bool isVol,
	MachinePointerInfo DstPtrInfo) {
	// Turn a memset of undef to nop.
	if (Src.isUndef())
	return Chain;

	// Expand memset to a series of load/store ops if the size operand
	// falls below a certain threshold.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	std::vector<EVT> MemOps;
	bool DstAlignCanChange = false;
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	bool OptSize = shouldLowerMemFuncForSize(MF);
	FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
	if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
	DstAlignCanChange = true;
	bool IsZeroVal =
	isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
	if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
	Size, (DstAlignCanChange ? 0 : Align), 0,
	true, IsZeroVal, false, true,
	DstPtrInfo.getAddrSpace(), ~0u,
	DAG, TLI))
	return SDValue();

	if (DstAlignCanChange) {
	Type Ty = MemOps[0].getTypeForEVT(DAG.getContext());
	unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
	if (NewAlign > Align) {
	// Give the stack frame object a larger alignment if needed.
	if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
	MFI.setObjectAlignment(FI->getIndex(), NewAlign);
	Align = NewAlign;
	}
	}

	SmallVector<SDValue, 8> OutChains;
	uint64_t DstOff = 0;
	unsigned NumMemOps = MemOps.size();

	// Find the largest store and generate the bit pattern for it.
	EVT LargestVT = MemOps[0];
	for (unsigned i = 1; i < NumMemOps; i++)
	if (MemOps[i].bitsGT(LargestVT))
	LargestVT = MemOps[i];
	SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);

	for (unsigned i = 0; i < NumMemOps; i++) {
	EVT VT = MemOps[i];
	unsigned VTSize = VT.getSizeInBits() / 8;
	if (VTSize > Size) {
	// Issuing an unaligned load / store pair that overlaps with the previous
	// pair. Adjust the offset accordingly.
	assert(i == NumMemOps-1 && i != 0);
	DstOff -= VTSize - Size;
	}

	// If this store is smaller than the largest store see whether we can get
	// the smaller value for free with a truncate.
	SDValue Value = MemSetValue;
	if (VT.bitsLT(LargestVT)) {
	if (!LargestVT.isVector() && !VT.isVector() &&
	TLI.isTruncateFree(LargestVT, VT))
	Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
	else
	Value = getMemsetValue(Src, VT, DAG, dl);
	}
	assert(Value.getValueType() == VT && "Value with wrong type.");
	SDValue Store = DAG.getStore(
	Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
	DstPtrInfo.getWithOffset(DstOff), Align,
	isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone);
	OutChains.push_back(Store);
	DstOff += VT.getSizeInBits() / 8;
	Size -= VTSize;
	}

	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
	}

	static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
	unsigned AS) {
	// Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
	// pointer operands can be losslessly bitcasted to pointers of address space 0
	if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) {
	report_fatal_error("cannot lower memory intrinsic in address space " +
	Twine(AS));
	}
	}

	SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
	SDValue Src, SDValue Size, unsigned Align,
	bool isVol, bool AlwaysInline, bool isTailCall,
	MachinePointerInfo DstPtrInfo,
	MachinePointerInfo SrcPtrInfo) {
	assert(Align && "The SDAG layer expects explicit alignment and reserves 0");

	// Check to see if we should lower the memcpy to loads and stores first.
	// For cases within the target-specified limits, this is the best choice.
	ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
	if (ConstantSize) {
	// Memcpy with size zero? Just return the original chain.
	if (ConstantSize->isNullValue())
	return Chain;

	SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
	ConstantSize->getZExtValue(),Align,
	isVol, false, DstPtrInfo, SrcPtrInfo);
	if (Result.getNode())
	return Result;
	}

	// Then check to see if we should lower the memcpy with target-specific
	// code. If the target chooses to do this, this is the next best.
	if (TSI) {
	SDValue Result = TSI->EmitTargetCodeForMemcpy(
	*this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline,
	DstPtrInfo, SrcPtrInfo);
	if (Result.getNode())
	return Result;
	}

	// If we really need inline code and the target declined to provide it,
	// use a (potentially long) sequence of loads and stores.
	if (AlwaysInline) {
	assert(ConstantSize && "AlwaysInline requires a constant size!");
	return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
	ConstantSize->getZExtValue(), Align, isVol,
	true, DstPtrInfo, SrcPtrInfo);
	}

	checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
	checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());

	// FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
	// memcpy is not guaranteed to be safe. libc memcpys aren't required to
	// respect volatile, so they may do things like read or write memory
	// beyond the given memory regions. But fixing this isn't easy, and most
	// people don't care.

	// Emit a library call.
	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;
	Entry.Ty = getDataLayout().getIntPtrType(*getContext());
	Entry.Node = Dst; Args.push_back(Entry);
	Entry.Node = Src; Args.push_back(Entry);
	Entry.Node = Size; Args.push_back(Entry);
	// FIXME: pass in SDLoc
	TargetLowering::CallLoweringInfo CLI(*this);
	CLI.setDebugLoc(dl)
	.setChain(Chain)
	.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
	Dst.getValueType().getTypeForEVT(*getContext()),
	getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
	TLI->getPointerTy(getDataLayout())),
	std::move(Args))
	.setDiscardResult()
	.setTailCall(isTailCall);

	std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
	return CallResult.second;
	}

	SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
	SDValue Src, SDValue Size, unsigned Align,
	bool isVol, bool isTailCall,
	MachinePointerInfo DstPtrInfo,
	MachinePointerInfo SrcPtrInfo) {
	assert(Align && "The SDAG layer expects explicit alignment and reserves 0");

	// Check to see if we should lower the memmove to loads and stores first.
	// For cases within the target-specified limits, this is the best choice.
	ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
	if (ConstantSize) {
	// Memmove with size zero? Just return the original chain.
	if (ConstantSize->isNullValue())
	return Chain;

	SDValue Result =
	getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
	ConstantSize->getZExtValue(), Align, isVol,
	false, DstPtrInfo, SrcPtrInfo);
	if (Result.getNode())
	return Result;
	}

	// Then check to see if we should lower the memmove with target-specific
	// code. If the target chooses to do this, this is the next best.
	if (TSI) {
	SDValue Result = TSI->EmitTargetCodeForMemmove(
	*this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
	if (Result.getNode())
	return Result;
	}

	checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
	checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());

	// FIXME: If the memmove is volatile, lowering it to plain libc memmove may
	// not be safe. See memcpy above for more details.

	// Emit a library call.
	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;
	Entry.Ty = getDataLayout().getIntPtrType(*getContext());
	Entry.Node = Dst; Args.push_back(Entry);
	Entry.Node = Src; Args.push_back(Entry);
	Entry.Node = Size; Args.push_back(Entry);
	// FIXME: pass in SDLoc
	TargetLowering::CallLoweringInfo CLI(*this);
	CLI.setDebugLoc(dl)
	.setChain(Chain)
	.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
	Dst.getValueType().getTypeForEVT(*getContext()),
	getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
	TLI->getPointerTy(getDataLayout())),
	std::move(Args))
	.setDiscardResult()
	.setTailCall(isTailCall);

	std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
	return CallResult.second;
	}

	SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
	SDValue Src, SDValue Size, unsigned Align,
	bool isVol, bool isTailCall,
	MachinePointerInfo DstPtrInfo) {
	assert(Align && "The SDAG layer expects explicit alignment and reserves 0");

	// Check to see if we should lower the memset to stores first.
	// For cases within the target-specified limits, this is the best choice.
	ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
	if (ConstantSize) {
	// Memset with size zero? Just return the original chain.
	if (ConstantSize->isNullValue())
	return Chain;

	SDValue Result =
	getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
	Align, isVol, DstPtrInfo);

	if (Result.getNode())
	return Result;
	}

	// Then check to see if we should lower the memset with target-specific
	// code. If the target chooses to do this, this is the next best.
	if (TSI) {
	SDValue Result = TSI->EmitTargetCodeForMemset(
	*this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo);
	if (Result.getNode())
	return Result;
	}

	checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());

	// Emit a library call.
	Type IntPtrTy = getDataLayout().getIntPtrType(getContext());
	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;
	Entry.Node = Dst; Entry.Ty = IntPtrTy;
	Args.push_back(Entry);
	Entry.Node = Src;
	Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());
	Args.push_back(Entry);
	Entry.Node = Size;
	Entry.Ty = IntPtrTy;
	Args.push_back(Entry);

	// FIXME: pass in SDLoc
	TargetLowering::CallLoweringInfo CLI(*this);
	CLI.setDebugLoc(dl)
	.setChain(Chain)
	.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
	Dst.getValueType().getTypeForEVT(*getContext()),
	getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
	TLI->getPointerTy(getDataLayout())),
	std::move(Args))
	.setDiscardResult()
	.setTailCall(isTailCall);

	std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
	return CallResult.second;
	}

	SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
	SDVTList VTList, ArrayRef<SDValue> Ops,
	MachineMemOperand *MMO) {
	FoldingSetNodeID ID;
	ID.AddInteger(MemVT.getRawBits());
	AddNodeIDNode(ID, Opcode, VTList, Ops);
	ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
	void* IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
	cast<AtomicSDNode>(E)->refineAlignment(MMO);
	return SDValue(E, 0);
	}

	auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
	VTList, MemVT, MMO);
	createOperands(N, Ops);

	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getAtomicCmpSwap(
	unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain,
	SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
	unsigned Alignment, AtomicOrdering SuccessOrdering,
	AtomicOrdering FailureOrdering, SyncScope::ID SSID) {
	assert(Opcode == ISD::ATOMIC_CMP_SWAP \|\|
	Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
	assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");

	if (Alignment == 0) // Ensure that codegen never sees alignment 0
	Alignment = getEVTAlignment(MemVT);

	MachineFunction &MF = getMachineFunction();

	// FIXME: Volatile isn't really correct; we should keep track of atomic
	// orderings in the memoperand.
	auto Flags = MachineMemOperand::MOVolatile \| MachineMemOperand::MOLoad \|
	MachineMemOperand::MOStore;
	MachineMemOperand *MMO =
	MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
	AAMDNodes(), nullptr, SSID, SuccessOrdering,
	FailureOrdering);

	return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO);
	}

	SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
	EVT MemVT, SDVTList VTs, SDValue Chain,
	SDValue Ptr, SDValue Cmp, SDValue Swp,
	MachineMemOperand *MMO) {
	assert(Opcode == ISD::ATOMIC_CMP_SWAP \|\|
	Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
	assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");

	SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
	return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
	}

	SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
	SDValue Chain, SDValue Ptr, SDValue Val,
	const Value *PtrVal, unsigned Alignment,
	AtomicOrdering Ordering,
	SyncScope::ID SSID) {
	if (Alignment == 0) // Ensure that codegen never sees alignment 0
	Alignment = getEVTAlignment(MemVT);

	MachineFunction &MF = getMachineFunction();
	// An atomic store does not load. An atomic load does not store.
	// (An atomicrmw obviously both loads and stores.)
	// For now, atomics are considered to be volatile always, and they are
	// chained as such.
	// FIXME: Volatile isn't really correct; we should keep track of atomic
	// orderings in the memoperand.
	auto Flags = MachineMemOperand::MOVolatile;
	if (Opcode != ISD::ATOMIC_STORE)
	Flags \|= MachineMemOperand::MOLoad;
	if (Opcode != ISD::ATOMIC_LOAD)
	Flags \|= MachineMemOperand::MOStore;

	MachineMemOperand *MMO =
	MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
	MemVT.getStoreSize(), Alignment, AAMDNodes(),
	nullptr, SSID, Ordering);

	return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
	}

	SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
	SDValue Chain, SDValue Ptr, SDValue Val,
	MachineMemOperand *MMO) {
	assert((Opcode == ISD::ATOMIC_LOAD_ADD \|\|
	Opcode == ISD::ATOMIC_LOAD_SUB \|\|
	Opcode == ISD::ATOMIC_LOAD_AND \|\|
	Opcode == ISD::ATOMIC_LOAD_OR \|\|
	Opcode == ISD::ATOMIC_LOAD_XOR \|\|
	Opcode == ISD::ATOMIC_LOAD_NAND \|\|
	Opcode == ISD::ATOMIC_LOAD_MIN \|\|
	Opcode == ISD::ATOMIC_LOAD_MAX \|\|
	Opcode == ISD::ATOMIC_LOAD_UMIN \|\|
	Opcode == ISD::ATOMIC_LOAD_UMAX \|\|
	Opcode == ISD::ATOMIC_SWAP \|\|
	Opcode == ISD::ATOMIC_STORE) &&
	"Invalid Atomic Op");

	EVT VT = Val.getValueType();

	SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
	getVTList(VT, MVT::Other);
	SDValue Ops[] = {Chain, Ptr, Val};
	return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
	}

	SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
	EVT VT, SDValue Chain, SDValue Ptr,
	MachineMemOperand *MMO) {
	assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");

	SDVTList VTs = getVTList(VT, MVT::Other);
	SDValue Ops[] = {Chain, Ptr};
	return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
	}

	/// getMergeValues - Create a MERGE_VALUES node from the given operands.
	SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
	if (Ops.size() == 1)
	return Ops[0];

	SmallVector<EVT, 4> VTs;
	VTs.reserve(Ops.size());
	for (unsigned i = 0; i < Ops.size(); ++i)
	VTs.push_back(Ops[i].getValueType());
	return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops);
	}

	SDValue SelectionDAG::getMemIntrinsicNode(
	unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
	EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol,
	bool ReadMem, bool WriteMem, unsigned Size) {
	if (Align == 0) // Ensure that codegen never sees alignment 0
	Align = getEVTAlignment(MemVT);

	MachineFunction &MF = getMachineFunction();
	auto Flags = MachineMemOperand::MONone;
	if (WriteMem)
	Flags \|= MachineMemOperand::MOStore;
	if (ReadMem)
	Flags \|= MachineMemOperand::MOLoad;
	if (Vol)
	Flags \|= MachineMemOperand::MOVolatile;
	if (!Size)
	Size = MemVT.getStoreSize();
	MachineMemOperand *MMO =
	MF.getMachineMemOperand(PtrInfo, Flags, Size, Align);

	return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
	}

	SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
	SDVTList VTList,
	ArrayRef<SDValue> Ops, EVT MemVT,
	MachineMemOperand *MMO) {
	assert((Opcode == ISD::INTRINSIC_VOID \|\|
	Opcode == ISD::INTRINSIC_W_CHAIN \|\|
	Opcode == ISD::PREFETCH \|\|
	Opcode == ISD::LIFETIME_START \|\|
	Opcode == ISD::LIFETIME_END \|\|
	((int)Opcode <= std::numeric_limits<int>::max() &&
	(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
	"Opcode is not a memory-accessing opcode!");

	// Memoize the node unless it returns a flag.
	MemIntrinsicSDNode *N;
	if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opcode, VTList, Ops);
	ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
	cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
	return SDValue(E, 0);
	}

	N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
	VTList, MemVT, MMO);
	createOperands(N, Ops);

	CSEMap.InsertNode(N, IP);
	} else {
	N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
	VTList, MemVT, MMO);
	createOperands(N, Ops);
	}
	InsertNode(N);
	return SDValue(N, 0);
	}

	/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
	/// MachinePointerInfo record from it. This is particularly useful because the
	/// code generator has many cases where it doesn't bother passing in a
	/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
	static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
	int64_t Offset = 0) {
	// If this is FI+Offset, we can model it.
	if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
	return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
	FI->getIndex(), Offset);

	// If this is (FI+Offset1)+Offset2, we can model it.
	if (Ptr.getOpcode() != ISD::ADD \|\|
	!isa<ConstantSDNode>(Ptr.getOperand(1)) \|\|
	!isa<FrameIndexSDNode>(Ptr.getOperand(0)))
	return MachinePointerInfo();

	int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
	return MachinePointerInfo::getFixedStack(
	DAG.getMachineFunction(), FI,
	Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
	}

	/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
	/// MachinePointerInfo record from it. This is particularly useful because the
	/// code generator has many cases where it doesn't bother passing in a
	/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
	static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
	SDValue OffsetOp) {
	// If the 'Offset' value isn't a constant, we can't handle this.
	if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
	return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue());
	if (OffsetOp.isUndef())
	return InferPointerInfo(DAG, Ptr);
	return MachinePointerInfo();
	}

	SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
	EVT VT, const SDLoc &dl, SDValue Chain,
	SDValue Ptr, SDValue Offset,
	MachinePointerInfo PtrInfo, EVT MemVT,
	unsigned Alignment,
	MachineMemOperand::Flags MMOFlags,
	const AAMDNodes &AAInfo, const MDNode *Ranges) {
	assert(Chain.getValueType() == MVT::Other &&
	"Invalid chain type");
	if (Alignment == 0) // Ensure that codegen never sees alignment 0
	Alignment = getEVTAlignment(MemVT);

	MMOFlags \|= MachineMemOperand::MOLoad;
	assert((MMOFlags & MachineMemOperand::MOStore) == 0);
	// If we don't have a PtrInfo, infer the trivial frame index case to simplify
	// clients.
	if (PtrInfo.V.isNull())
	PtrInfo = InferPointerInfo(*this, Ptr, Offset);

	MachineFunction &MF = getMachineFunction();
	MachineMemOperand *MMO = MF.getMachineMemOperand(
	PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges);
	return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
	}

	SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
	EVT VT, const SDLoc &dl, SDValue Chain,
	SDValue Ptr, SDValue Offset, EVT MemVT,
	MachineMemOperand *MMO) {
	if (VT == MemVT) {
	ExtType = ISD::NON_EXTLOAD;
	} else if (ExtType == ISD::NON_EXTLOAD) {
	assert(VT == MemVT && "Non-extending load from different memory type!");
	} else {
	// Extending load.
	assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
	"Should only be an extending load, not truncating!");
	assert(VT.isInteger() == MemVT.isInteger() &&
	"Cannot convert from FP to Int or Int -> FP!");
	assert(VT.isVector() == MemVT.isVector() &&
	"Cannot use an ext load to convert to or from a vector!");
	assert((!VT.isVector() \|\|
	VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
	"Cannot use an ext load to change the number of vector elements!");
	}

	bool Indexed = AM != ISD::UNINDEXED;
	assert((Indexed \|\| Offset.isUndef()) && "Unindexed load with an offset!");

	SDVTList VTs = Indexed ?
	getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
	SDValue Ops[] = { Chain, Ptr, Offset };
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::LOAD, VTs, Ops);
	ID.AddInteger(MemVT.getRawBits());
	ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>(
	dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO));
	ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
	cast<LoadSDNode>(E)->refineAlignment(MMO);
	return SDValue(E, 0);
	}
	auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
	ExtType, MemVT, MMO);
	createOperands(N, Ops);

	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
	SDValue Ptr, MachinePointerInfo PtrInfo,
	unsigned Alignment,
	MachineMemOperand::Flags MMOFlags,
	const AAMDNodes &AAInfo, const MDNode *Ranges) {
	SDValue Undef = getUNDEF(Ptr.getValueType());
	return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
	PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges);
	}

	SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
	SDValue Ptr, MachineMemOperand *MMO) {
	SDValue Undef = getUNDEF(Ptr.getValueType());
	return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
	VT, MMO);
	}

	SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
	EVT VT, SDValue Chain, SDValue Ptr,
	MachinePointerInfo PtrInfo, EVT MemVT,
	unsigned Alignment,
	MachineMemOperand::Flags MMOFlags,
	const AAMDNodes &AAInfo) {
	SDValue Undef = getUNDEF(Ptr.getValueType());
	return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, PtrInfo,
	MemVT, Alignment, MMOFlags, AAInfo);
	}

	SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
	EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT,
	MachineMemOperand *MMO) {
	SDValue Undef = getUNDEF(Ptr.getValueType());
	return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
	MemVT, MMO);
	}

	SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
	SDValue Base, SDValue Offset,
	ISD::MemIndexedMode AM) {
	LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
	assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
	// Don't propagate the invariant or dereferenceable flags.
	auto MMOFlags =
	LD->getMemOperand()->getFlags() &
	~(MachineMemOperand::MOInvariant \| MachineMemOperand::MODereferenceable);
	return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
	LD->getChain(), Base, Offset, LD->getPointerInfo(),
	LD->getMemoryVT(), LD->getAlignment(), MMOFlags,
	LD->getAAInfo());
	}

	SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
	SDValue Ptr, MachinePointerInfo PtrInfo,
	unsigned Alignment,
	MachineMemOperand::Flags MMOFlags,
	const AAMDNodes &AAInfo) {
	assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
	if (Alignment == 0) // Ensure that codegen never sees alignment 0
	Alignment = getEVTAlignment(Val.getValueType());

	MMOFlags \|= MachineMemOperand::MOStore;
	assert((MMOFlags & MachineMemOperand::MOLoad) == 0);

	if (PtrInfo.V.isNull())
	PtrInfo = InferPointerInfo(*this, Ptr);

	MachineFunction &MF = getMachineFunction();
	MachineMemOperand *MMO = MF.getMachineMemOperand(
	PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo);
	return getStore(Chain, dl, Val, Ptr, MMO);
	}

	SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
	SDValue Ptr, MachineMemOperand *MMO) {
	assert(Chain.getValueType() == MVT::Other &&
	"Invalid chain type");
	EVT VT = Val.getValueType();
	SDVTList VTs = getVTList(MVT::Other);
	SDValue Undef = getUNDEF(Ptr.getValueType());
	SDValue Ops[] = { Chain, Val, Ptr, Undef };
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
	ID.AddInteger(VT.getRawBits());
	ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
	dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO));
	ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
	cast<StoreSDNode>(E)->refineAlignment(MMO);
	return SDValue(E, 0);
	}
	auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
	ISD::UNINDEXED, false, VT, MMO);
	createOperands(N, Ops);

	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
	SDValue Ptr, MachinePointerInfo PtrInfo,
	EVT SVT, unsigned Alignment,
	MachineMemOperand::Flags MMOFlags,
	const AAMDNodes &AAInfo) {
	assert(Chain.getValueType() == MVT::Other &&
	"Invalid chain type");
	if (Alignment == 0) // Ensure that codegen never sees alignment 0
	Alignment = getEVTAlignment(SVT);

	MMOFlags \|= MachineMemOperand::MOStore;
	assert((MMOFlags & MachineMemOperand::MOLoad) == 0);

	if (PtrInfo.V.isNull())
	PtrInfo = InferPointerInfo(*this, Ptr);

	MachineFunction &MF = getMachineFunction();
	MachineMemOperand *MMO = MF.getMachineMemOperand(
	PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo);
	return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
	}

	SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
	SDValue Ptr, EVT SVT,
	MachineMemOperand *MMO) {
	EVT VT = Val.getValueType();

	assert(Chain.getValueType() == MVT::Other &&
	"Invalid chain type");
	if (VT == SVT)
	return getStore(Chain, dl, Val, Ptr, MMO);

	assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
	"Should only be a truncating store, not extending!");
	assert(VT.isInteger() == SVT.isInteger() &&
	"Can't do FP-INT conversion!");
	assert(VT.isVector() == SVT.isVector() &&
	"Cannot use trunc store to convert to or from a vector!");
	assert((!VT.isVector() \|\|
	VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
	"Cannot use trunc store to change the number of vector elements!");

	SDVTList VTs = getVTList(MVT::Other);
	SDValue Undef = getUNDEF(Ptr.getValueType());
	SDValue Ops[] = { Chain, Val, Ptr, Undef };
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
	ID.AddInteger(SVT.getRawBits());
	ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
	dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO));
	ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
	cast<StoreSDNode>(E)->refineAlignment(MMO);
	return SDValue(E, 0);
	}
	auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
	ISD::UNINDEXED, true, SVT, MMO);
	createOperands(N, Ops);

	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
	SDValue Base, SDValue Offset,
	ISD::MemIndexedMode AM) {
	StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
	assert(ST->getOffset().isUndef() && "Store is already a indexed store!");
	SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
	SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
	ID.AddInteger(ST->getMemoryVT().getRawBits());
	ID.AddInteger(ST->getRawSubclassData());
	ID.AddInteger(ST->getPointerInfo().getAddrSpace());
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
	return SDValue(E, 0);

	auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
	ST->isTruncatingStore(), ST->getMemoryVT(),
	ST->getMemOperand());
	createOperands(N, Ops);

	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
	SDValue Ptr, SDValue Mask, SDValue Src0,
	EVT MemVT, MachineMemOperand *MMO,
	ISD::LoadExtType ExtTy, bool isExpanding) {
	SDVTList VTs = getVTList(VT, MVT::Other);
	SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
	ID.AddInteger(VT.getRawBits());
	ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
	dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));
	ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
	cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
	return SDValue(E, 0);
	}
	auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
	ExtTy, isExpanding, MemVT, MMO);
	createOperands(N, Ops);

	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
	SDValue Val, SDValue Ptr, SDValue Mask,
	EVT MemVT, MachineMemOperand *MMO,
	bool IsTruncating, bool IsCompressing) {
	assert(Chain.getValueType() == MVT::Other &&
	"Invalid chain type");
	EVT VT = Val.getValueType();
	SDVTList VTs = getVTList(MVT::Other);
	SDValue Ops[] = { Chain, Ptr, Mask, Val };
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
	ID.AddInteger(VT.getRawBits());
	ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
	dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));
	ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
	cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
	return SDValue(E, 0);
	}
	auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
	IsTruncating, IsCompressing, MemVT, MMO);
	createOperands(N, Ops);

	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
	ArrayRef<SDValue> Ops,
	MachineMemOperand *MMO) {
	assert(Ops.size() == 5 && "Incompatible number of operands");

	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
	ID.AddInteger(VT.getRawBits());
	ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
	dl.getIROrder(), VTs, VT, MMO));
	ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
	cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
	return SDValue(E, 0);
	}

	auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
	VTs, VT, MMO);
	createOperands(N, Ops);

	assert(N->getValue().getValueType() == N->getValueType(0) &&
	"Incompatible type of the PassThru value in MaskedGatherSDNode");
	assert(N->getMask().getValueType().getVectorNumElements() ==
	N->getValueType(0).getVectorNumElements() &&
	"Vector width mismatch between mask and data");
	assert(N->getIndex().getValueType().getVectorNumElements() ==
	N->getValueType(0).getVectorNumElements() &&
	"Vector width mismatch between index and data");

	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
	ArrayRef<SDValue> Ops,
	MachineMemOperand *MMO) {
	assert(Ops.size() == 5 && "Incompatible number of operands");

	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
	ID.AddInteger(VT.getRawBits());
	ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
	dl.getIROrder(), VTs, VT, MMO));
	ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
	cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
	return SDValue(E, 0);
	}
	auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
	VTs, VT, MMO);
	createOperands(N, Ops);

	assert(N->getMask().getValueType().getVectorNumElements() ==
	N->getValue().getValueType().getVectorNumElements() &&
	"Vector width mismatch between mask and data");
	assert(N->getIndex().getValueType().getVectorNumElements() ==
	N->getValue().getValueType().getVectorNumElements() &&
	"Vector width mismatch between index and data");

	CSEMap.InsertNode(N, IP);
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
	SDValue Ptr, SDValue SV, unsigned Align) {
	SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
	return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
	ArrayRef<SDUse> Ops) {
	switch (Ops.size()) {
	case 0: return getNode(Opcode, DL, VT);
	case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0]));
	case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
	case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
	default: break;
	}

	// Copy from an SDUse array into an SDValue array for use with
	// the regular getNode logic.
	SmallVector<SDValue, 8> NewOps(Ops.begin(), Ops.end());
	return getNode(Opcode, DL, VT, NewOps);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
	ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
	unsigned NumOps = Ops.size();
	switch (NumOps) {
	case 0: return getNode(Opcode, DL, VT);
	case 1: return getNode(Opcode, DL, VT, Ops[0], Flags);
	case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
	case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
	default: break;
	}

	switch (Opcode) {
	default: break;
	case ISD::CONCAT_VECTORS:
	// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
	if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
	return V;
	break;
	case ISD::SELECT_CC:
	assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
	assert(Ops[0].getValueType() == Ops[1].getValueType() &&
	"LHS and RHS of condition must have same type!");
	assert(Ops[2].getValueType() == Ops[3].getValueType() &&
	"True and False arms of SelectCC must have same type!");
	assert(Ops[2].getValueType() == VT &&
	"select_cc node must be of same type as true and false value!");
	break;
	case ISD::BR_CC:
	assert(NumOps == 5 && "BR_CC takes 5 operands!");
	assert(Ops[2].getValueType() == Ops[3].getValueType() &&
	"LHS/RHS of comparison should match types!");
	break;
	}

	// Memoize nodes.
	SDNode *N;
	SDVTList VTs = getVTList(VT);

	if (VT != MVT::Glue) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opcode, VTs, Ops);
	void *IP = nullptr;

	if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
	return SDValue(E, 0);

	N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
	createOperands(N, Ops);

	CSEMap.InsertNode(N, IP);
	} else {
	N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
	createOperands(N, Ops);
	}

	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
	ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) {
	return getNode(Opcode, DL, getVTList(ResultTys), Ops);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
	ArrayRef<SDValue> Ops) {
	if (VTList.NumVTs == 1)
	return getNode(Opcode, DL, VTList.VTs[0], Ops);

	#if 0
	switch (Opcode) {
	// FIXME: figure out how to safely handle things like
	// int foo(int x) { return 1 << (x & 255); }
	// int bar() { return foo(256); }
	case ISD::SRA_PARTS:
	case ISD::SRL_PARTS:
	case ISD::SHL_PARTS:
	if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
	cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
	return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
	else if (N3.getOpcode() == ISD::AND)
	if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
	// If the and is only masking out bits that cannot effect the shift,
	// eliminate the and.
	unsigned NumBits = VT.getScalarSizeInBits()*2;
	if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
	return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
	}
	break;
	}
	#endif

	// Memoize the node unless it returns a flag.
	SDNode *N;
	if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opcode, VTList, Ops);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
	return SDValue(E, 0);

	N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
	createOperands(N, Ops);
	CSEMap.InsertNode(N, IP);
	} else {
	N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
	createOperands(N, Ops);
	}
	InsertNode(N);
	return SDValue(N, 0);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
	SDVTList VTList) {
	return getNode(Opcode, DL, VTList, None);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
	SDValue N1) {
	SDValue Ops[] = { N1 };
	return getNode(Opcode, DL, VTList, Ops);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
	SDValue N1, SDValue N2) {
	SDValue Ops[] = { N1, N2 };
	return getNode(Opcode, DL, VTList, Ops);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
	SDValue N1, SDValue N2, SDValue N3) {
	SDValue Ops[] = { N1, N2, N3 };
	return getNode(Opcode, DL, VTList, Ops);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
	SDValue N1, SDValue N2, SDValue N3, SDValue N4) {
	SDValue Ops[] = { N1, N2, N3, N4 };
	return getNode(Opcode, DL, VTList, Ops);
	}

	SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
	SDValue N1, SDValue N2, SDValue N3, SDValue N4,
	SDValue N5) {
	SDValue Ops[] = { N1, N2, N3, N4, N5 };
	return getNode(Opcode, DL, VTList, Ops);
	}

	SDVTList SelectionDAG::getVTList(EVT VT) {
	return makeVTList(SDNode::getValueTypeList(VT), 1);
	}

	SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
	FoldingSetNodeID ID;
	ID.AddInteger(2U);
	ID.AddInteger(VT1.getRawBits());
	ID.AddInteger(VT2.getRawBits());

	void *IP = nullptr;
	SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
	if (!Result) {
	EVT *Array = Allocator.Allocate<EVT>(2);
	Array[0] = VT1;
	Array[1] = VT2;
	Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2);
	VTListMap.InsertNode(Result, IP);
	}
	return Result->getSDVTList();
	}

	SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
	FoldingSetNodeID ID;
	ID.AddInteger(3U);
	ID.AddInteger(VT1.getRawBits());
	ID.AddInteger(VT2.getRawBits());
	ID.AddInteger(VT3.getRawBits());

	void *IP = nullptr;
	SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
	if (!Result) {
	EVT *Array = Allocator.Allocate<EVT>(3);
	Array[0] = VT1;
	Array[1] = VT2;
	Array[2] = VT3;
	Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3);
	VTListMap.InsertNode(Result, IP);
	}
	return Result->getSDVTList();
	}

	SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
	FoldingSetNodeID ID;
	ID.AddInteger(4U);
	ID.AddInteger(VT1.getRawBits());
	ID.AddInteger(VT2.getRawBits());
	ID.AddInteger(VT3.getRawBits());
	ID.AddInteger(VT4.getRawBits());

	void *IP = nullptr;
	SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
	if (!Result) {
	EVT *Array = Allocator.Allocate<EVT>(4);
	Array[0] = VT1;
	Array[1] = VT2;
	Array[2] = VT3;
	Array[3] = VT4;
	Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4);
	VTListMap.InsertNode(Result, IP);
	}
	return Result->getSDVTList();
	}

	SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) {
	unsigned NumVTs = VTs.size();
	FoldingSetNodeID ID;
	ID.AddInteger(NumVTs);
	for (unsigned index = 0; index < NumVTs; index++) {
	ID.AddInteger(VTs[index].getRawBits());
	}

	void *IP = nullptr;
	SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
	if (!Result) {
	EVT *Array = Allocator.Allocate<EVT>(NumVTs);
	std::copy(VTs.begin(), VTs.end(), Array);
	Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs);
	VTListMap.InsertNode(Result, IP);
	}
	return Result->getSDVTList();
	}


	/// UpdateNodeOperands - Mutate the specified node in-place to have the
	/// specified operands. If the resultant node already exists in the DAG,
	/// this does not modify the specified node, instead it returns the node that
	/// already exists. If the resultant node does not exist in the DAG, the
	/// input node is returned. As a degenerate case, if you specify the same
	/// input operands as the node already has, the input node is returned.
	SDNode SelectionDAG::UpdateNodeOperands(SDNode N, SDValue Op) {
	assert(N->getNumOperands() == 1 && "Update with wrong number of operands");

	// Check to see if there is no change.
	if (Op == N->getOperand(0)) return N;

	// See if the modified node already exists.
	void *InsertPos = nullptr;
	if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
	return Existing;

	// Nope it doesn't. Remove the node from its current place in the maps.
	if (InsertPos)
	if (!RemoveNodeFromCSEMaps(N))
	InsertPos = nullptr;

	// Now we update the operands.
	N->OperandList[0].set(Op);

	// If this gets put into a CSE map, add it.
	if (InsertPos) CSEMap.InsertNode(N, InsertPos);
	return N;
	}

	SDNode SelectionDAG::UpdateNodeOperands(SDNode N, SDValue Op1, SDValue Op2) {
	assert(N->getNumOperands() == 2 && "Update with wrong number of operands");

	// Check to see if there is no change.
	if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
	return N; // No operands changed, just return the input node.

	// See if the modified node already exists.
	void *InsertPos = nullptr;
	if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
	return Existing;

	// Nope it doesn't. Remove the node from its current place in the maps.
	if (InsertPos)
	if (!RemoveNodeFromCSEMaps(N))
	InsertPos = nullptr;

	// Now we update the operands.
	if (N->OperandList[0] != Op1)
	N->OperandList[0].set(Op1);
	if (N->OperandList[1] != Op2)
	N->OperandList[1].set(Op2);

	// If this gets put into a CSE map, add it.
	if (InsertPos) CSEMap.InsertNode(N, InsertPos);
	return N;
	}

	SDNode *SelectionDAG::
	UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
	SDValue Ops[] = { Op1, Op2, Op3 };
	return UpdateNodeOperands(N, Ops);
	}

	SDNode *SelectionDAG::
	UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
	SDValue Op3, SDValue Op4) {
	SDValue Ops[] = { Op1, Op2, Op3, Op4 };
	return UpdateNodeOperands(N, Ops);
	}

	SDNode *SelectionDAG::
	UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
	SDValue Op3, SDValue Op4, SDValue Op5) {
	SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
	return UpdateNodeOperands(N, Ops);
	}

	SDNode *SelectionDAG::
	UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
	unsigned NumOps = Ops.size();
	assert(N->getNumOperands() == NumOps &&
	"Update with wrong number of operands");

	// If no operands changed just return the input node.
	if (std::equal(Ops.begin(), Ops.end(), N->op_begin()))
	return N;

	// See if the modified node already exists.
	void *InsertPos = nullptr;
	if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos))
	return Existing;

	// Nope it doesn't. Remove the node from its current place in the maps.
	if (InsertPos)
	if (!RemoveNodeFromCSEMaps(N))
	InsertPos = nullptr;

	// Now we update the operands.
	for (unsigned i = 0; i != NumOps; ++i)
	if (N->OperandList[i] != Ops[i])
	N->OperandList[i].set(Ops[i]);

	// If this gets put into a CSE map, add it.
	if (InsertPos) CSEMap.InsertNode(N, InsertPos);
	return N;
	}

	/// DropOperands - Release the operands and set this node to have
	/// zero operands.
	void SDNode::DropOperands() {
	// Unlike the code in MorphNodeTo that does this, we don't need to
	// watch for dead nodes here.
	for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
	SDUse &Use = *I++;
	Use.set(SDValue());
	}
	}

	/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
	/// machine opcode.
	///
	SDNode SelectionDAG::SelectNodeTo(SDNode N, unsigned MachineOpc,
	EVT VT) {
	SDVTList VTs = getVTList(VT);
	return SelectNodeTo(N, MachineOpc, VTs, None);
	}

	SDNode SelectionDAG::SelectNodeTo(SDNode N, unsigned MachineOpc,
	EVT VT, SDValue Op1) {
	SDVTList VTs = getVTList(VT);
	SDValue Ops[] = { Op1 };
	return SelectNodeTo(N, MachineOpc, VTs, Ops);
	}

	SDNode SelectionDAG::SelectNodeTo(SDNode N, unsigned MachineOpc,
	EVT VT, SDValue Op1,
	SDValue Op2) {
	SDVTList VTs = getVTList(VT);
	SDValue Ops[] = { Op1, Op2 };
	return SelectNodeTo(N, MachineOpc, VTs, Ops);
	}

	SDNode SelectionDAG::SelectNodeTo(SDNode N, unsigned MachineOpc,
	EVT VT, SDValue Op1,
	SDValue Op2, SDValue Op3) {
	SDVTList VTs = getVTList(VT);
	SDValue Ops[] = { Op1, Op2, Op3 };
	return SelectNodeTo(N, MachineOpc, VTs, Ops);
	}

	SDNode SelectionDAG::SelectNodeTo(SDNode N, unsigned MachineOpc,
	EVT VT, ArrayRef<SDValue> Ops) {
	SDVTList VTs = getVTList(VT);
	return SelectNodeTo(N, MachineOpc, VTs, Ops);
	}

	SDNode SelectionDAG::SelectNodeTo(SDNode N, unsigned MachineOpc,
	EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) {
	SDVTList VTs = getVTList(VT1, VT2);
	return SelectNodeTo(N, MachineOpc, VTs, Ops);
	}

	SDNode SelectionDAG::SelectNodeTo(SDNode N, unsigned MachineOpc,
	EVT VT1, EVT VT2) {
	SDVTList VTs = getVTList(VT1, VT2);
	return SelectNodeTo(N, MachineOpc, VTs, None);
	}

	SDNode SelectionDAG::SelectNodeTo(SDNode N, unsigned MachineOpc,
	EVT VT1, EVT VT2, EVT VT3,
	ArrayRef<SDValue> Ops) {
	SDVTList VTs = getVTList(VT1, VT2, VT3);
	return SelectNodeTo(N, MachineOpc, VTs, Ops);
	}

	SDNode SelectionDAG::SelectNodeTo(SDNode N, unsigned MachineOpc,
	EVT VT1, EVT VT2,
	SDValue Op1, SDValue Op2) {
	SDVTList VTs = getVTList(VT1, VT2);
	SDValue Ops[] = { Op1, Op2 };
	return SelectNodeTo(N, MachineOpc, VTs, Ops);
	}

	SDNode SelectionDAG::SelectNodeTo(SDNode N, unsigned MachineOpc,
	SDVTList VTs,ArrayRef<SDValue> Ops) {
	SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops);
	// Reset the NodeID to -1.
	New->setNodeId(-1);
	if (New != N) {
	ReplaceAllUsesWith(N, New);
	RemoveDeadNode(N);
	}
	return New;
	}

	/// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away
	/// the line number information on the merged node since it is not possible to
	/// preserve the information that operation is associated with multiple lines.
	/// This will make the debugger working better at -O0, were there is a higher
	/// probability having other instructions associated with that line.
	///
	/// For IROrder, we keep the smaller of the two
	SDNode SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode N, const SDLoc &OLoc) {
	DebugLoc NLoc = N->getDebugLoc();
	if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
	N->setDebugLoc(DebugLoc());
	}
	unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
	N->setIROrder(Order);
	return N;
	}

	/// MorphNodeTo - This mutates the specified node to have the specified
	/// return type, opcode, and operands.
	///
	/// Note that MorphNodeTo returns the resultant node. If there is already a
	/// node of the specified opcode and operands, it returns that node instead of
	/// the current one. Note that the SDLoc need not be the same.
	///
	/// Using MorphNodeTo is faster than creating a new node and swapping it in
	/// with ReplaceAllUsesWith both because it often avoids allocating a new
	/// node, and because it doesn't require CSE recalculation for any of
	/// the node's users.
	///
	/// However, note that MorphNodeTo recursively deletes dead nodes from the DAG.
	/// As a consequence it isn't appropriate to use from within the DAG combiner or
	/// the legalizer which maintain worklists that would need to be updated when
	/// deleting things.
	SDNode SelectionDAG::MorphNodeTo(SDNode N, unsigned Opc,
	SDVTList VTs, ArrayRef<SDValue> Ops) {
	// If an identical node already exists, use it.
	void *IP = nullptr;
	if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opc, VTs, Ops);
	if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP))
	return UpdateSDLocOnMergeSDNode(ON, SDLoc(N));
	}

	if (!RemoveNodeFromCSEMaps(N))
	IP = nullptr;

	// Start the morphing.
	N->NodeType = Opc;
	N->ValueList = VTs.VTs;
	N->NumValues = VTs.NumVTs;

	// Clear the operands list, updating used nodes to remove this from their
	// use list. Keep track of any operands that become dead as a result.
	SmallPtrSet<SDNode*, 16> DeadNodeSet;
	for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
	SDUse &Use = *I++;
	SDNode *Used = Use.getNode();
	Use.set(SDValue());
	if (Used->use_empty())
	DeadNodeSet.insert(Used);
	}

	// For MachineNode, initialize the memory references information.
	if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N))
	MN->setMemRefs(nullptr, nullptr);

	// Swap for an appropriately sized array from the recycler.
	removeOperands(N);
	createOperands(N, Ops);

	// Delete any nodes that are still dead after adding the uses for the
	// new operands.
	if (!DeadNodeSet.empty()) {
	SmallVector<SDNode *, 16> DeadNodes;
	for (SDNode *N : DeadNodeSet)
	if (N->use_empty())
	DeadNodes.push_back(N);
	RemoveDeadNodes(DeadNodes);
	}

	if (IP)
	CSEMap.InsertNode(N, IP); // Memoize the new node.
	return N;
	}

	SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
	unsigned OrigOpc = Node->getOpcode();
	unsigned NewOpc;
	bool IsUnary = false;
	switch (OrigOpc) {
	default:
	llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
	case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
	case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
	case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
	case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
	case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
	case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
	case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
	case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
	case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break;
	case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break;
	case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break;
	case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break;
	case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break;
	case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break;
	case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break;
	case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break;
	case ISD::STRICT_FNEARBYINT:
	NewOpc = ISD::FNEARBYINT;
	IsUnary = true;
	break;
	}

	// We're taking this node out of the chain, so we need to re-link things.
	SDValue InputChain = Node->getOperand(0);
	SDValue OutputChain = SDValue(Node, 1);
	ReplaceAllUsesOfValueWith(OutputChain, InputChain);

	SDVTList VTs = getVTList(Node->getOperand(1).getValueType());
	SDNode *Res = nullptr;
	if (IsUnary)
	Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
	else
	Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
	Node->getOperand(2) });

	// MorphNodeTo can operate in two ways: if an existing node with the
	// specified operands exists, it can just return it. Otherwise, it
	// updates the node in place to have the requested operands.
	if (Res == Node) {
	// If we updated the node in place, reset the node ID. To the isel,
	// this should be just like a newly allocated machine node.
	Res->setNodeId(-1);
	} else {
	ReplaceAllUsesWith(Node, Res);
	RemoveDeadNode(Node);
	}

	return Res;
	}

	/// getMachineNode - These are used for target selectors to create a new node
	/// with specified return type(s), MachineInstr opcode, and operands.
	///
	/// Note that getMachineNode returns the resultant node. If there is already a
	/// node of the specified opcode and operands, it returns that node instead of
	/// the current one.
	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
	EVT VT) {
	SDVTList VTs = getVTList(VT);
	return getMachineNode(Opcode, dl, VTs, None);
	}

	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
	EVT VT, SDValue Op1) {
	SDVTList VTs = getVTList(VT);
	SDValue Ops[] = { Op1 };
	return getMachineNode(Opcode, dl, VTs, Ops);
	}

	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
	EVT VT, SDValue Op1, SDValue Op2) {
	SDVTList VTs = getVTList(VT);
	SDValue Ops[] = { Op1, Op2 };
	return getMachineNode(Opcode, dl, VTs, Ops);
	}

	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
	EVT VT, SDValue Op1, SDValue Op2,
	SDValue Op3) {
	SDVTList VTs = getVTList(VT);
	SDValue Ops[] = { Op1, Op2, Op3 };
	return getMachineNode(Opcode, dl, VTs, Ops);
	}

	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
	EVT VT, ArrayRef<SDValue> Ops) {
	SDVTList VTs = getVTList(VT);
	return getMachineNode(Opcode, dl, VTs, Ops);
	}

	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
	EVT VT1, EVT VT2, SDValue Op1,
	SDValue Op2) {
	SDVTList VTs = getVTList(VT1, VT2);
	SDValue Ops[] = { Op1, Op2 };
	return getMachineNode(Opcode, dl, VTs, Ops);
	}

	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
	EVT VT1, EVT VT2, SDValue Op1,
	SDValue Op2, SDValue Op3) {
	SDVTList VTs = getVTList(VT1, VT2);
	SDValue Ops[] = { Op1, Op2, Op3 };
	return getMachineNode(Opcode, dl, VTs, Ops);
	}

	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
	EVT VT1, EVT VT2,
	ArrayRef<SDValue> Ops) {
	SDVTList VTs = getVTList(VT1, VT2);
	return getMachineNode(Opcode, dl, VTs, Ops);
	}

	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
	EVT VT1, EVT VT2, EVT VT3,
	SDValue Op1, SDValue Op2) {
	SDVTList VTs = getVTList(VT1, VT2, VT3);
	SDValue Ops[] = { Op1, Op2 };
	return getMachineNode(Opcode, dl, VTs, Ops);
	}

	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
	EVT VT1, EVT VT2, EVT VT3,
	SDValue Op1, SDValue Op2,
	SDValue Op3) {
	SDVTList VTs = getVTList(VT1, VT2, VT3);
	SDValue Ops[] = { Op1, Op2, Op3 };
	return getMachineNode(Opcode, dl, VTs, Ops);
	}

	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
	EVT VT1, EVT VT2, EVT VT3,
	ArrayRef<SDValue> Ops) {
	SDVTList VTs = getVTList(VT1, VT2, VT3);
	return getMachineNode(Opcode, dl, VTs, Ops);
	}

	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
	ArrayRef<EVT> ResultTys,
	ArrayRef<SDValue> Ops) {
	SDVTList VTs = getVTList(ResultTys);
	return getMachineNode(Opcode, dl, VTs, Ops);
	}

	MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,
	SDVTList VTs,
	ArrayRef<SDValue> Ops) {
	bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
	MachineSDNode *N;
	void *IP = nullptr;

	if (DoCSE) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, ~Opcode, VTs, Ops);
	IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
	return cast<MachineSDNode>(UpdateSDLocOnMergeSDNode(E, DL));
	}
	}

	// Allocate a new MachineSDNode.
	N = newSDNode<MachineSDNode>(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
	createOperands(N, Ops);

	if (DoCSE)
	CSEMap.InsertNode(N, IP);

	InsertNode(N);
	return N;
	}

	/// getTargetExtractSubreg - A convenience function for creating
	/// TargetOpcode::EXTRACT_SUBREG nodes.
	SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT,
	SDValue Operand) {
	SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
	SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
	VT, Operand, SRIdxVal);
	return SDValue(Subreg, 0);
	}

	/// getTargetInsertSubreg - A convenience function for creating
	/// TargetOpcode::INSERT_SUBREG nodes.
	SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
	SDValue Operand, SDValue Subreg) {
	SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
	SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
	VT, Operand, Subreg, SRIdxVal);
	return SDValue(Result, 0);
	}

	/// getNodeIfExists - Get the specified node if it's already available, or
	/// else return NULL.
	SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
	ArrayRef<SDValue> Ops,
	const SDNodeFlags Flags) {
	if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
	FoldingSetNodeID ID;
	AddNodeIDNode(ID, Opcode, VTList, Ops);
	void *IP = nullptr;
	if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) {
	E->intersectFlagsWith(Flags);
	return E;
	}
	}
	return nullptr;
	}

	/// getDbgValue - Creates a SDDbgValue node.
	///
	/// SDNode
	SDDbgValue SelectionDAG::getDbgValue(MDNode Var, MDNode Expr, SDNode N,
	unsigned R, bool IsIndirect, uint64_t Off,
	const DebugLoc &DL, unsigned O) {
	assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
	"Expected inlined-at fields to agree");
	return new (DbgInfo->getAlloc())
	SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
	}

	/// Constant
	SDDbgValue SelectionDAG::getConstantDbgValue(MDNode Var, MDNode *Expr,
	const Value *C, uint64_t Off,
	const DebugLoc &DL, unsigned O) {
	assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
	"Expected inlined-at fields to agree");
	return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O);
	}

	/// FrameIndex
	SDDbgValue SelectionDAG::getFrameIndexDbgValue(MDNode Var, MDNode *Expr,
	unsigned FI, uint64_t Off,
	const DebugLoc &DL,
	unsigned O) {
	assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
	"Expected inlined-at fields to agree");
	return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O);
	}

	namespace {

	/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
	/// pointed to by a use iterator is deleted, increment the use iterator
	/// so that it doesn't dangle.
	///
	class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
	SDNode::use_iterator &UI;
	SDNode::use_iterator &UE;

	void NodeDeleted(SDNode N, SDNode E) override {
	// Increment the iterator as needed.
	while (UI != UE && N == *UI)
	++UI;
	}

	public:
	RAUWUpdateListener(SelectionDAG &d,
	SDNode::use_iterator &ui,
	SDNode::use_iterator &ue)
	: SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
	};

	} // end anonymous namespace

	/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
	/// This can cause recursive merging of nodes in the DAG.
	///
	/// This version assumes From has a single result value.
	///
	void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
	SDNode *From = FromN.getNode();
	assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
	"Cannot replace with this method!");
	assert(From != To.getNode() && "Cannot replace uses of with self");

	// Preserve Debug Values
	TransferDbgValues(FromN, To);

	// Iterate over all the existing uses of From. New uses will be added
	// to the beginning of the use list, which we avoid visiting.
	// This specifically avoids visiting uses of From that arise while the
	// replacement is happening, because any such uses would be the result
	// of CSE: If an existing node looks like From after one of its operands
	// is replaced by To, we don't want to replace of all its users with To
	// too. See PR3018 for more info.
	SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
	RAUWUpdateListener Listener(*this, UI, UE);
	while (UI != UE) {
	SDNode User = UI;

	// This node is about to morph, remove its old self from the CSE maps.
	RemoveNodeFromCSEMaps(User);

	// A user can appear in a use list multiple times, and when this
	// happens the uses are usually next to each other in the list.
	// To help reduce the number of CSE recomputations, process all
	// the uses of this user that we can find this way.
	do {
	SDUse &Use = UI.getUse();
	++UI;
	Use.set(To);
	} while (UI != UE && *UI == User);

	// Now that we have modified User, add it back to the CSE maps. If it
	// already exists there, recursively merge the results together.
	AddModifiedNodeToCSEMaps(User);
	}

	// If we just RAUW'd the root, take note.
	if (FromN == getRoot())
	setRoot(To);
	}

	/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
	/// This can cause recursive merging of nodes in the DAG.
	///
	/// This version assumes that for each value of From, there is a
	/// corresponding value in To in the same position with the same type.
	///
	void SelectionDAG::ReplaceAllUsesWith(SDNode From, SDNode To) {
	#ifndef NDEBUG
	for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
	assert((!From->hasAnyUseOfValue(i) \|\|
	From->getValueType(i) == To->getValueType(i)) &&
	"Cannot use this version of ReplaceAllUsesWith!");
	#endif

	// Handle the trivial case.
	if (From == To)
	return;

	// Preserve Debug Info. Only do this if there's a use.
	for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
	if (From->hasAnyUseOfValue(i)) {
	assert((i < To->getNumValues()) && "Invalid To location");
	TransferDbgValues(SDValue(From, i), SDValue(To, i));
	}

	// Iterate over just the existing users of From. See the comments in
	// the ReplaceAllUsesWith above.
	SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
	RAUWUpdateListener Listener(*this, UI, UE);
	while (UI != UE) {
	SDNode User = UI;

	// This node is about to morph, remove its old self from the CSE maps.
	RemoveNodeFromCSEMaps(User);

	// A user can appear in a use list multiple times, and when this
	// happens the uses are usually next to each other in the list.
	// To help reduce the number of CSE recomputations, process all
	// the uses of this user that we can find this way.
	do {
	SDUse &Use = UI.getUse();
	++UI;
	Use.setNode(To);
	} while (UI != UE && *UI == User);

	// Now that we have modified User, add it back to the CSE maps. If it
	// already exists there, recursively merge the results together.
	AddModifiedNodeToCSEMaps(User);
	}

	// If we just RAUW'd the root, take note.
	if (From == getRoot().getNode())
	setRoot(SDValue(To, getRoot().getResNo()));
	}

	/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
	/// This can cause recursive merging of nodes in the DAG.
	///
	/// This version can replace From with any result values. To must match the
	/// number and types of values returned by From.
	void SelectionDAG::ReplaceAllUsesWith(SDNode From, const SDValue To) {
	if (From->getNumValues() == 1) // Handle the simple case efficiently.
	return ReplaceAllUsesWith(SDValue(From, 0), To[0]);

	// Preserve Debug Info.
	for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
	TransferDbgValues(SDValue(From, i), *To);

	// Iterate over just the existing users of From. See the comments in
	// the ReplaceAllUsesWith above.
	SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
	RAUWUpdateListener Listener(*this, UI, UE);
	while (UI != UE) {
	SDNode User = UI;

	// This node is about to morph, remove its old self from the CSE maps.
	RemoveNodeFromCSEMaps(User);

	// A user can appear in a use list multiple times, and when this
	// happens the uses are usually next to each other in the list.
	// To help reduce the number of CSE recomputations, process all
	// the uses of this user that we can find this way.
	do {
	SDUse &Use = UI.getUse();
	const SDValue &ToOp = To[Use.getResNo()];
	++UI;
	Use.set(ToOp);
	} while (UI != UE && *UI == User);

	// Now that we have modified User, add it back to the CSE maps. If it
	// already exists there, recursively merge the results together.
	AddModifiedNodeToCSEMaps(User);
	}

	// If we just RAUW'd the root, take note.
	if (From == getRoot().getNode())
	setRoot(SDValue(To[getRoot().getResNo()]));
	}

	/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
	/// uses of other values produced by From.getNode() alone. The Deleted
	/// vector is handled the same way as for ReplaceAllUsesWith.
	void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
	// Handle the really simple, really trivial case efficiently.
	if (From == To) return;

	// Handle the simple, trivial, case efficiently.
	if (From.getNode()->getNumValues() == 1) {
	ReplaceAllUsesWith(From, To);
	return;
	}

	// Preserve Debug Info.
	TransferDbgValues(From, To);

	// Iterate over just the existing users of From. See the comments in
	// the ReplaceAllUsesWith above.
	SDNode::use_iterator UI = From.getNode()->use_begin(),
	UE = From.getNode()->use_end();
	RAUWUpdateListener Listener(*this, UI, UE);
	while (UI != UE) {
	SDNode User = UI;
	bool UserRemovedFromCSEMaps = false;

	// A user can appear in a use list multiple times, and when this
	// happens the uses are usually next to each other in the list.
	// To help reduce the number of CSE recomputations, process all
	// the uses of this user that we can find this way.
	do {
	SDUse &Use = UI.getUse();

	// Skip uses of different values from the same node.
	if (Use.getResNo() != From.getResNo()) {
	++UI;
	continue;
	}

	// If this node hasn't been modified yet, it's still in the CSE maps,
	// so remove its old self from the CSE maps.
	if (!UserRemovedFromCSEMaps) {
	RemoveNodeFromCSEMaps(User);
	UserRemovedFromCSEMaps = true;
	}

	++UI;
	Use.set(To);
	} while (UI != UE && *UI == User);

	// We are iterating over all uses of the From node, so if a use
	// doesn't use the specific value, no changes are made.
	if (!UserRemovedFromCSEMaps)
	continue;

	// Now that we have modified User, add it back to the CSE maps. If it
	// already exists there, recursively merge the results together.
	AddModifiedNodeToCSEMaps(User);
	}

	// If we just RAUW'd the root, take note.
	if (From == getRoot())
	setRoot(To);
	}

	namespace {

	/// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
	/// to record information about a use.
	struct UseMemo {
	SDNode *User;
	unsigned Index;
	SDUse *Use;
	};

	/// operator< - Sort Memos by User.
	bool operator<(const UseMemo &L, const UseMemo &R) {
	return (intptr_t)L.User < (intptr_t)R.User;
	}

	} // end anonymous namespace

	/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
	/// uses of other values produced by From.getNode() alone. The same value
	/// may appear in both the From and To list. The Deleted vector is
	/// handled the same way as for ReplaceAllUsesWith.
	void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
	const SDValue *To,
	unsigned Num){
	// Handle the simple, trivial case efficiently.
	if (Num == 1)
	return ReplaceAllUsesOfValueWith(From, To);

	TransferDbgValues(From, To);

	// Read up all the uses and make records of them. This helps
	// processing new uses that are introduced during the
	// replacement process.
	SmallVector<UseMemo, 4> Uses;
	for (unsigned i = 0; i != Num; ++i) {
	unsigned FromResNo = From[i].getResNo();
	SDNode *FromNode = From[i].getNode();
	for (SDNode::use_iterator UI = FromNode->use_begin(),
	E = FromNode->use_end(); UI != E; ++UI) {
	SDUse &Use = UI.getUse();
	if (Use.getResNo() == FromResNo) {
	UseMemo Memo = { *UI, i, &Use };
	Uses.push_back(Memo);
	}
	}
	}

	// Sort the uses, so that all the uses from a given User are together.
	std::sort(Uses.begin(), Uses.end());

	for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
	UseIndex != UseIndexEnd; ) {
	// We know that this user uses some value of From. If it is the right
	// value, update it.
	SDNode *User = Uses[UseIndex].User;

	// This node is about to morph, remove its old self from the CSE maps.
	RemoveNodeFromCSEMaps(User);

	// The Uses array is sorted, so all the uses for a given User
	// are next to each other in the list.
	// To help reduce the number of CSE recomputations, process all
	// the uses of this user that we can find this way.
	do {
	unsigned i = Uses[UseIndex].Index;
	SDUse &Use = *Uses[UseIndex].Use;
	++UseIndex;

	Use.set(To[i]);
	} while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);

	// Now that we have modified User, add it back to the CSE maps. If it
	// already exists there, recursively merge the results together.
	AddModifiedNodeToCSEMaps(User);
	}
	}

	/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
	/// based on their topological order. It returns the maximum id and a vector
	/// of the SDNodes* in assigned order by reference.
	unsigned SelectionDAG::AssignTopologicalOrder() {
	unsigned DAGSize = 0;

	// SortedPos tracks the progress of the algorithm. Nodes before it are
	// sorted, nodes after it are unsorted. When the algorithm completes
	// it is at the end of the list.
	allnodes_iterator SortedPos = allnodes_begin();

	// Visit all the nodes. Move nodes with no operands to the front of
	// the list immediately. Annotate nodes that do have operands with their
	// operand count. Before we do this, the Node Id fields of the nodes
	// may contain arbitrary values. After, the Node Id fields for nodes
	// before SortedPos will contain the topological sort index, and the
	// Node Id fields for nodes At SortedPos and after will contain the
	// count of outstanding operands.
	for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
	SDNode N = &I++;
	checkForCycles(N, this);
	unsigned Degree = N->getNumOperands();
	if (Degree == 0) {
	// A node with no uses, add it to the result array immediately.
	N->setNodeId(DAGSize++);
	allnodes_iterator Q(N);
	if (Q != SortedPos)
	SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
	assert(SortedPos != AllNodes.end() && "Overran node list");
	++SortedPos;
	} else {
	// Temporarily use the Node Id as scratch space for the degree count.
	N->setNodeId(Degree);
	}
	}

	// Visit all the nodes. As we iterate, move nodes into sorted order,
	// such that by the time the end is reached all nodes will be sorted.
	for (SDNode &Node : allnodes()) {
	SDNode *N = &Node;
	checkForCycles(N, this);
	// N is in sorted position, so all its uses have one less operand
	// that needs to be sorted.
	for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
	UI != UE; ++UI) {
	SDNode P = UI;
	unsigned Degree = P->getNodeId();
	assert(Degree != 0 && "Invalid node degree");
	--Degree;
	if (Degree == 0) {
	// All of P's operands are sorted, so P may sorted now.
	P->setNodeId(DAGSize++);
	if (P->getIterator() != SortedPos)
	SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
	assert(SortedPos != AllNodes.end() && "Overran node list");
	++SortedPos;
	} else {
	// Update P's outstanding operand count.
	P->setNodeId(Degree);
	}
	}
	if (Node.getIterator() == SortedPos) {
	#ifndef NDEBUG
	allnodes_iterator I(N);
	SDNode S = &++I;
	dbgs() << "Overran sorted position:\n";
	S->dumprFull(this); dbgs() << "\n";
	dbgs() << "Checking if this is due to cycles\n";
	checkForCycles(this, true);
	#endif
	llvm_unreachable(nullptr);
	}
	}

	assert(SortedPos == AllNodes.end() &&
	"Topological sort incomplete!");
	assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
	"First node in topological sort is not the entry token!");
	assert(AllNodes.front().getNodeId() == 0 &&
	"First node in topological sort has non-zero id!");
	assert(AllNodes.front().getNumOperands() == 0 &&
	"First node in topological sort has operands!");
	assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
	"Last node in topologic sort has unexpected id!");
	assert(AllNodes.back().use_empty() &&
	"Last node in topologic sort has users!");
	assert(DAGSize == allnodes_size() && "Node count mismatch!");
	return DAGSize;
	}

	/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
	/// value is produced by SD.
	void SelectionDAG::AddDbgValue(SDDbgValue DB, SDNode SD, bool isParameter) {
	if (SD) {
	assert(DbgInfo->getSDDbgValues(SD).empty() \|\| SD->getHasDebugValue());
	SD->setHasDebugValue(true);
	}
	DbgInfo->add(DB, SD, isParameter);
	}

	/// TransferDbgValues - Transfer SDDbgValues. Called in replace nodes.
	void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
	if (From == To \|\| !From.getNode()->getHasDebugValue())
	return;
	SDNode *FromNode = From.getNode();
	SDNode *ToNode = To.getNode();
	ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
	SmallVector<SDDbgValue *, 2> ClonedDVs;
	for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
	I != E; ++I) {
	SDDbgValue Dbg = I;
	// Only add Dbgvalues attached to same ResNo.
	if (Dbg->getKind() == SDDbgValue::SDNODE &&
	Dbg->getSDNode() == From.getNode() &&
	Dbg->getResNo() == From.getResNo() && !Dbg->isInvalidated()) {
	assert(FromNode != ToNode &&
	"Should not transfer Debug Values intranode");
	SDDbgValue *Clone =
	getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode,
	To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(),
	Dbg->getDebugLoc(), Dbg->getOrder());
	ClonedDVs.push_back(Clone);
	Dbg->setIsInvalidated();
	}
	}
	for (SDDbgValue *I : ClonedDVs)
	AddDbgValue(I, ToNode, false);
	}

	SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
	SDValue NewMemOp) {
	assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
	// The new memory operation must have the same position as the old load in
	// terms of memory dependency. Create a TokenFactor for the old load and new
	// memory operation and update uses of the old load's output chain to use that
	// TokenFactor.
	SDValue OldChain = SDValue(OldLoad, 1);
	SDValue NewChain = SDValue(NewMemOp.getNode(), 1);
	if (!OldLoad->hasAnyUseOfValue(1))
	return NewChain;

	SDValue TokenFactor =
	getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain);
	ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
	UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain);
	return TokenFactor;
	}

	//===----------------------------------------------------------------------===//
	// SDNode Class
	//===----------------------------------------------------------------------===//

	bool llvm::isNullConstant(SDValue V) {
	ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
	return Const != nullptr && Const->isNullValue();
	}

	bool llvm::isNullFPConstant(SDValue V) {
	ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
	return Const != nullptr && Const->isZero() && !Const->isNegative();
	}

	bool llvm::isAllOnesConstant(SDValue V) {
	ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
	return Const != nullptr && Const->isAllOnesValue();
	}

	bool llvm::isOneConstant(SDValue V) {
	ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
	return Const != nullptr && Const->isOne();
	}

	bool llvm::isBitwiseNot(SDValue V) {
	return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1));
	}

	ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
	return CN;

	if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
	BitVector UndefElements;
	ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);

	// BuildVectors can truncate their operands. Ignore that case here.
	// FIXME: We blindly ignore splats which include undef which is overly
	// pessimistic.
	if (CN && UndefElements.none() &&
	CN->getValueType(0) == N.getValueType().getScalarType())
	return CN;
	}

	return nullptr;
	}

	ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
	if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
	return CN;

	if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
	BitVector UndefElements;
	ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);

	if (CN && UndefElements.none())
	return CN;
	}

	return nullptr;
	}

	HandleSDNode::~HandleSDNode() {
	DropOperands();
	}

	GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,
	const DebugLoc &DL,
	const GlobalValue *GA, EVT VT,
	int64_t o, unsigned char TF)
	: SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
	TheGlobal = GA;
	}

	AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl,
	EVT VT, unsigned SrcAS,
	unsigned DestAS)
	: SDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT)),
	SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {}

	MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
	SDVTList VTs, EVT memvt, MachineMemOperand *mmo)
	: SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
	MemSDNodeBits.IsVolatile = MMO->isVolatile();
	MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal();
	MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable();
	MemSDNodeBits.IsInvariant = MMO->isInvariant();

	// We check here that the size of the memory operand fits within the size of
	// the MMO. This is because the MMO might indicate only a possible address
	// range instead of specifying the affected memory addresses precisely.
	assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
	}

	/// Profile - Gather unique data for the node.
	///
	void SDNode::Profile(FoldingSetNodeID &ID) const {
	AddNodeIDNode(ID, this);
	}

	namespace {

	struct EVTArray {
	std::vector<EVT> VTs;

	EVTArray() {
	VTs.reserve(MVT::LAST_VALUETYPE);
	for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
	VTs.push_back(MVT((MVT::SimpleValueType)i));
	}
	};

	} // end anonymous namespace

	static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs;
	static ManagedStatic<EVTArray> SimpleVTArray;
	static ManagedStatic<sys::SmartMutex<true>> VTMutex;

	/// getValueTypeList - Return a pointer to the specified value type.
	///
	const EVT *SDNode::getValueTypeList(EVT VT) {
	if (VT.isExtended()) {
	sys::SmartScopedLock<true> Lock(*VTMutex);
	return &(*EVTs->insert(VT).first);
	} else {
	assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
	"Value type out of range!");
	return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
	}
	}

	/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
	/// indicated value. This method ignores uses of other values defined by this
	/// operation.
	bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
	assert(Value < getNumValues() && "Bad value!");

	// TODO: Only iterate over uses of a given value of the node
	for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
	if (UI.getUse().getResNo() == Value) {
	if (NUses == 0)
	return false;
	--NUses;
	}
	}

	// Found exactly the right number of uses?
	return NUses == 0;
	}

	/// hasAnyUseOfValue - Return true if there are any use of the indicated
	/// value. This method ignores uses of other values defined by this operation.
	bool SDNode::hasAnyUseOfValue(unsigned Value) const {
	assert(Value < getNumValues() && "Bad value!");

	for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
	if (UI.getUse().getResNo() == Value)
	return true;

	return false;
	}

	/// isOnlyUserOf - Return true if this node is the only use of N.
	bool SDNode::isOnlyUserOf(const SDNode *N) const {
	bool Seen = false;
	for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
	SDNode User = I;
	if (User == this)
	Seen = true;
	else
	return false;
	}

	return Seen;
	}

	/// Return true if the only users of N are contained in Nodes.
	bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode > Nodes, const SDNode N) {
	bool Seen = false;
	for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
	SDNode User = I;
	if (llvm::any_of(Nodes,
	[&User](const SDNode *Node) { return User == Node; }))
	Seen = true;
	else
	return false;
	}

	return Seen;
	}

	/// isOperand - Return true if this node is an operand of N.
	bool SDValue::isOperandOf(const SDNode *N) const {
	for (const SDValue &Op : N->op_values())
	if (*this == Op)
	return true;
	return false;
	}

	bool SDNode::isOperandOf(const SDNode *N) const {
	for (const SDValue &Op : N->op_values())
	if (this == Op.getNode())
	return true;
	return false;
	}

	/// reachesChainWithoutSideEffects - Return true if this operand (which must
	/// be a chain) reaches the specified operand without crossing any
	/// side-effecting instructions on any chain path. In practice, this looks
	/// through token factors and non-volatile loads. In order to remain efficient,
	/// this only looks a couple of nodes in, it does not do an exhaustive search.
	///
	/// Note that we only need to examine chains when we're searching for
	/// side-effects; SelectionDAG requires that all side-effects are represented
	/// by chains, even if another operand would force a specific ordering. This
	/// constraint is necessary to allow transformations like splitting loads.
	bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
	unsigned Depth) const {
	if (*this == Dest) return true;

	// Don't search too deeply, we just want to be able to see through
	// TokenFactor's etc.
	if (Depth == 0) return false;

	// If this is a token factor, all inputs to the TF happen in parallel.
	if (getOpcode() == ISD::TokenFactor) {
	// First, try a shallow search.
	if (is_contained((*this)->ops(), Dest)) {
	// We found the chain we want as an operand of this TokenFactor.
	// Essentially, we reach the chain without side-effects if we could
	// serialize the TokenFactor into a simple chain of operations with
	// Dest as the last operation. This is automatically true if the
	// chain has one use: there are no other ordering constraints.
	// If the chain has more than one use, we give up: some other
	// use of Dest might force a side-effect between Dest and the current
	// node.
	if (Dest.hasOneUse())
	return true;
	}
	// Next, try a deep search: check whether every operand of the TokenFactor
	// reaches Dest.
	return llvm::all_of((*this)->ops(), [=](SDValue Op) {
	return Op.reachesChainWithoutSideEffects(Dest, Depth - 1);
	});
	}

	// Loads don't have side effects, look through them.
	if (LoadSDNode Ld = dyn_cast<LoadSDNode>(this)) {
	if (!Ld->isVolatile())
	return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
	}
	return false;
	}

	bool SDNode::hasPredecessor(const SDNode *N) const {
	SmallPtrSet<const SDNode *, 32> Visited;
	SmallVector<const SDNode *, 16> Worklist;
	Worklist.push_back(this);
	return hasPredecessorHelper(N, Visited, Worklist);
	}

	void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {
	this->Flags.intersectWith(Flags);
	}

	SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
	assert(N->getNumValues() == 1 &&
	"Can't unroll a vector with multiple results!");

	EVT VT = N->getValueType(0);
	unsigned NE = VT.getVectorNumElements();
	EVT EltVT = VT.getVectorElementType();
	SDLoc dl(N);

	SmallVector<SDValue, 8> Scalars;
	SmallVector<SDValue, 4> Operands(N->getNumOperands());

	// If ResNE is 0, fully unroll the vector op.
	if (ResNE == 0)
	ResNE = NE;
	else if (NE > ResNE)
	NE = ResNE;

	unsigned i;
	for (i= 0; i != NE; ++i) {
	for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
	SDValue Operand = N->getOperand(j);
	EVT OperandVT = Operand.getValueType();
	if (OperandVT.isVector()) {
	// A vector operand; extract a single element.
	EVT OperandEltVT = OperandVT.getVectorElementType();
	Operands[j] =
	getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
	getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout())));
	} else {
	// A scalar operand; just use it as is.
	Operands[j] = Operand;
	}
	}

	switch (N->getOpcode()) {
	default: {
	Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands,
	N->getFlags()));
	break;
	}
	case ISD::VSELECT:
	Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
	break;
	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL:
	case ISD::ROTL:
	case ISD::ROTR:
	Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
	getShiftAmountOperand(Operands[0].getValueType(),
	Operands[1])));
	break;
	case ISD::SIGN_EXTEND_INREG:
	case ISD::FP_ROUND_INREG: {
	EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
	Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
	Operands[0],
	getValueType(ExtVT)));
	}
	}
	}

	for (; i < ResNE; ++i)
	Scalars.push_back(getUNDEF(EltVT));

	EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE);
	return getBuildVector(VecVT, dl, Scalars);
	}

	bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
	LoadSDNode *Base,
	unsigned Bytes,
	int Dist) const {
	if (LD->isVolatile() \|\| Base->isVolatile())
	return false;
	if (LD->isIndexed() \|\| Base->isIndexed())
	return false;
	if (LD->getChain() != Base->getChain())
	return false;
	EVT VT = LD->getValueType(0);
	if (VT.getSizeInBits() / 8 != Bytes)
	return false;

	SDValue Loc = LD->getOperand(1);
	SDValue BaseLoc = Base->getOperand(1);

	auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);
	auto LocDecomp = BaseIndexOffset::match(Loc, *this);

	int64_t Offset = 0;
	if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
	return (Dist * Bytes == Offset);
	return false;
	}

	/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
	/// it cannot be inferred.
	unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
	// If this is a GlobalAddress + cst, return the alignment.
	const GlobalValue *GV;
	int64_t GVOffset = 0;
	if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
	unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
	KnownBits Known(PtrWidth);
	llvm::computeKnownBits(GV, Known, getDataLayout());
	unsigned AlignBits = Known.countMinTrailingZeros();
	unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
	if (Align)
	return MinAlign(Align, GVOffset);
	}

	// If this is a direct reference to a stack slot, use information about the
	// stack slot's alignment.
	int FrameIdx = 1 << 31;
	int64_t FrameOffset = 0;
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
	FrameIdx = FI->getIndex();
	} else if (isBaseWithConstantOffset(Ptr) &&
	isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
	// Handle FI+Cst
	FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
	FrameOffset = Ptr.getConstantOperandVal(1);
	}

	if (FrameIdx != (1 << 31)) {
	const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
	unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
	FrameOffset);
	return FIInfoAlign;
	}

	return 0;
	}

	/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
	/// which is split (or expanded) into two not necessarily identical pieces.
	std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
	// Currently all types are split in half.
	EVT LoVT, HiVT;
	if (!VT.isVector())
	LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT);
	else
	LoVT = HiVT = VT.getHalfNumVectorElementsVT(*getContext());

	return std::make_pair(LoVT, HiVT);
	}

	/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the
	/// low/high part.
	std::pair<SDValue, SDValue>
	SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
	const EVT &HiVT) {
	assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <=
	N.getValueType().getVectorNumElements() &&
	"More vector elements requested than available!");
	SDValue Lo, Hi;
	Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
	getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
	Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
	getConstant(LoVT.getVectorNumElements(), DL,
	TLI->getVectorIdxTy(getDataLayout())));
	return std::make_pair(Lo, Hi);
	}

	void SelectionDAG::ExtractVectorElements(SDValue Op,
	SmallVectorImpl<SDValue> &Args,
	unsigned Start, unsigned Count) {
	EVT VT = Op.getValueType();
	if (Count == 0)
	Count = VT.getVectorNumElements();

	EVT EltVT = VT.getVectorElementType();
	EVT IdxTy = TLI->getVectorIdxTy(getDataLayout());
	SDLoc SL(Op);
	for (unsigned i = Start, e = Start + Count; i != e; ++i) {
	Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
	Op, getConstant(i, SL, IdxTy)));
	}
	}

	// getAddressSpace - Return the address space this GlobalAddress belongs to.
	unsigned GlobalAddressSDNode::getAddressSpace() const {
	return getGlobal()->getType()->getAddressSpace();
	}

	Type *ConstantPoolSDNode::getType() const {
	if (isMachineConstantPoolEntry())
	return Val.MachineCPVal->getType();
	return Val.ConstVal->getType();
	}

	bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
	unsigned &SplatBitSize,
	bool &HasAnyUndefs,
	unsigned MinSplatBits,
	bool IsBigEndian) const {
	EVT VT = getValueType(0);
	assert(VT.isVector() && "Expected a vector type");
	unsigned VecWidth = VT.getSizeInBits();
	if (MinSplatBits > VecWidth)
	return false;

	// FIXME: The widths are based on this node's type, but build vectors can
	// truncate their operands.
	SplatValue = APInt(VecWidth, 0);
	SplatUndef = APInt(VecWidth, 0);

	// Get the bits. Bits with undefined values (when the corresponding element
	// of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
	// in SplatValue. If any of the values are not constant, give up and return
	// false.
	unsigned int NumOps = getNumOperands();
	assert(NumOps > 0 && "isConstantSplat has 0-size build vector");
	unsigned EltWidth = VT.getScalarSizeInBits();

	for (unsigned j = 0; j < NumOps; ++j) {
	unsigned i = IsBigEndian ? NumOps - 1 - j : j;
	SDValue OpVal = getOperand(i);
	unsigned BitPos = j * EltWidth;

	if (OpVal.isUndef())
	SplatUndef.setBits(BitPos, BitPos + EltWidth);
	else if (auto *CN = dyn_cast<ConstantSDNode>(OpVal))
	SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
	else if (auto *CN = dyn_cast<ConstantFPSDNode>(OpVal))
	SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos);
	else
	return false;
	}

	// The build_vector is all constants or undefs. Find the smallest element
	// size that splats the vector.
	HasAnyUndefs = (SplatUndef != 0);

	// FIXME: This does not work for vectors with elements less than 8 bits.
	while (VecWidth > 8) {
	unsigned HalfSize = VecWidth / 2;
	APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
	APInt LowValue = SplatValue.trunc(HalfSize);
	APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
	APInt LowUndef = SplatUndef.trunc(HalfSize);

	// If the two halves do not match (ignoring undef bits), stop here.
	if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) \|\|
	MinSplatBits > HalfSize)
	break;

	SplatValue = HighValue \| LowValue;
	SplatUndef = HighUndef & LowUndef;

	VecWidth = HalfSize;
	}

	SplatBitSize = VecWidth;
	return true;
	}

	SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
	if (UndefElements) {
	UndefElements->clear();
	UndefElements->resize(getNumOperands());
	}
	SDValue Splatted;
	for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
	SDValue Op = getOperand(i);
	if (Op.isUndef()) {
	if (UndefElements)
	(*UndefElements)[i] = true;
	} else if (!Splatted) {
	Splatted = Op;
	} else if (Splatted != Op) {
	return SDValue();
	}
	}

	if (!Splatted) {
	assert(getOperand(0).isUndef() &&
	"Can only have a splat without a constant for all undefs.");
	return getOperand(0);
	}

	return Splatted;
	}

	ConstantSDNode *
	BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
	return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements));
	}

	ConstantFPSDNode *
	BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
	return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
	}

	int32_t
	BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
	uint32_t BitWidth) const {
	if (ConstantFPSDNode *CN =
	dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) {
	bool IsExact;
	APSInt IntVal(BitWidth);
	const APFloat &APF = CN->getValueAPF();
	if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
	APFloat::opOK \|\|
	!IsExact)
	return -1;

	return IntVal.exactLogBase2();
	}
	return -1;
	}

	bool BuildVectorSDNode::isConstant() const {
	for (const SDValue &Op : op_values()) {
	unsigned Opc = Op.getOpcode();
	if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP)
	return false;
	}
	return true;
	}

	bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
	// Find the first non-undef value in the shuffle mask.
	unsigned i, e;
	for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
	/* search */;

	assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");

	// Make sure all remaining elements are either undef or the same as the first
	// non-undef value.
	for (int Idx = Mask[i]; i != e; ++i)
	if (Mask[i] >= 0 && Mask[i] != Idx)
	return false;
	return true;
	}

	// \brief Returns the SDNode if it is a constant integer BuildVector
	// or constant integer.
	SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
	if (isa<ConstantSDNode>(N))
	return N.getNode();
	if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
	return N.getNode();
	// Treat a GlobalAddress supporting constant offset folding as a
	// constant integer.
	if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N))
	if (GA->getOpcode() == ISD::GlobalAddress &&
	TLI->isOffsetFoldingLegal(GA))
	return GA;
	return nullptr;
	}

	SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
	if (isa<ConstantFPSDNode>(N))
	return N.getNode();

	if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
	return N.getNode();

	return nullptr;
	}

	#ifndef NDEBUG
	static void checkForCyclesHelper(const SDNode *N,
	SmallPtrSetImpl<const SDNode*> &Visited,
	SmallPtrSetImpl<const SDNode*> &Checked,
	const llvm::SelectionDAG *DAG) {
	// If this node has already been checked, don't check it again.
	if (Checked.count(N))
	return;

	// If a node has already been visited on this depth-first walk, reject it as
	// a cycle.
	if (!Visited.insert(N).second) {
	errs() << "Detected cycle in SelectionDAG\n";
	dbgs() << "Offending node:\n";
	N->dumprFull(DAG); dbgs() << "\n";
	abort();
	}

	for (const SDValue &Op : N->op_values())
	checkForCyclesHelper(Op.getNode(), Visited, Checked, DAG);

	Checked.insert(N);
	Visited.erase(N);
	}
	#endif

	void llvm::checkForCycles(const llvm::SDNode *N,
	const llvm::SelectionDAG *DAG,
	bool force) {
	#ifndef NDEBUG
	bool check = force;
	#ifdef EXPENSIVE_CHECKS
	check = true;
	#endif // EXPENSIVE_CHECKS
	if (check) {
	assert(N && "Checking nonexistent SDNode");
	SmallPtrSet<const SDNode*, 32> visited;
	SmallPtrSet<const SDNode*, 32> checked;
	checkForCyclesHelper(N, visited, checked, DAG);
	}
	#endif // !NDEBUG
	}

	void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) {
	checkForCycles(DAG->getRoot().getNode(), DAG, force);
	}
	Index: head/contrib/llvm/lib/IR/AutoUpgrade.cpp
	===================================================================
	--- head/contrib/llvm/lib/IR/AutoUpgrade.cpp (revision 322854)
	+++ head/contrib/llvm/lib/IR/AutoUpgrade.cpp (revision 322855)
	@@ -1,2332 +1,2350 @@
	//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the auto-upgrade helper functions.
	// This is where deprecated IR intrinsics and other IR features are updated to
	// current specifications.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/IR/AutoUpgrade.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/IR/CFG.h"
	#include "llvm/IR/CallSite.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DIBuilder.h"
	#include "llvm/IR/DebugInfo.h"
	#include "llvm/IR/DiagnosticInfo.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Module.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/Regex.h"
	#include <cstring>
	using namespace llvm;

	static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }

	// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
	// changed their type from v4f32 to v2i64.
	static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
	Function *&NewFn) {
	// Check whether this is an old version of the function, which received
	// v4f32 arguments.
	Type *Arg0Type = F->getFunctionType()->getParamType(0);
	if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
	return false;

	// Yes, it's old, replace it with new version.
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
	return true;
	}

	// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
	// arguments have changed their type from i32 to i8.
	static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
	Function *&NewFn) {
	// Check that the last argument is an i32.
	Type *LastArgType = F->getFunctionType()->getParamType(
	F->getFunctionType()->getNumParams() - 1);
	if (!LastArgType->isIntegerTy(32))
	return false;

	// Move this function aside and map down.
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
	return true;
	}

	static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
	// All of the intrinsics matches below should be marked with which llvm
	// version started autoupgrading them. At some point in the future we would
	// like to use this information to remove upgrade code for some older
	// intrinsics. It is currently undecided how we will determine that future
	// point.
	if (Name.startswith("sse2.pcmpeq.") \|\| // Added in 3.1
	Name.startswith("sse2.pcmpgt.") \|\| // Added in 3.1
	Name.startswith("avx2.pcmpeq.") \|\| // Added in 3.1
	Name.startswith("avx2.pcmpgt.") \|\| // Added in 3.1
	Name.startswith("avx512.mask.pcmpeq.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pcmpgt.") \|\| // Added in 3.9
	Name == "sse.add.ss" \|\| // Added in 4.0
	Name == "sse2.add.sd" \|\| // Added in 4.0
	Name == "sse.sub.ss" \|\| // Added in 4.0
	Name == "sse2.sub.sd" \|\| // Added in 4.0
	Name == "sse.mul.ss" \|\| // Added in 4.0
	Name == "sse2.mul.sd" \|\| // Added in 4.0
	Name == "sse.div.ss" \|\| // Added in 4.0
	Name == "sse2.div.sd" \|\| // Added in 4.0
	Name == "sse41.pmaxsb" \|\| // Added in 3.9
	Name == "sse2.pmaxs.w" \|\| // Added in 3.9
	Name == "sse41.pmaxsd" \|\| // Added in 3.9
	Name == "sse2.pmaxu.b" \|\| // Added in 3.9
	Name == "sse41.pmaxuw" \|\| // Added in 3.9
	Name == "sse41.pmaxud" \|\| // Added in 3.9
	Name == "sse41.pminsb" \|\| // Added in 3.9
	Name == "sse2.pmins.w" \|\| // Added in 3.9
	Name == "sse41.pminsd" \|\| // Added in 3.9
	Name == "sse2.pminu.b" \|\| // Added in 3.9
	Name == "sse41.pminuw" \|\| // Added in 3.9
	Name == "sse41.pminud" \|\| // Added in 3.9
	Name.startswith("avx512.mask.pshuf.b.") \|\| // Added in 4.0
	Name.startswith("avx2.pmax") \|\| // Added in 3.9
	Name.startswith("avx2.pmin") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pmax") \|\| // Added in 4.0
	Name.startswith("avx512.mask.pmin") \|\| // Added in 4.0
	Name.startswith("avx2.vbroadcast") \|\| // Added in 3.8
	Name.startswith("avx2.pbroadcast") \|\| // Added in 3.8
	Name.startswith("avx.vpermil.") \|\| // Added in 3.1
	Name.startswith("sse2.pshuf") \|\| // Added in 3.9
	Name.startswith("avx512.pbroadcast") \|\| // Added in 3.9
	Name.startswith("avx512.mask.broadcast.s") \|\| // Added in 3.9
	Name.startswith("avx512.mask.movddup") \|\| // Added in 3.9
	Name.startswith("avx512.mask.movshdup") \|\| // Added in 3.9
	Name.startswith("avx512.mask.movsldup") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pshuf.d.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pshufl.w.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pshufh.w.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.shuf.p") \|\| // Added in 4.0
	Name.startswith("avx512.mask.vpermil.p") \|\| // Added in 3.9
	Name.startswith("avx512.mask.perm.df.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.perm.di.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.punpckl") \|\| // Added in 3.9
	Name.startswith("avx512.mask.punpckh") \|\| // Added in 3.9
	Name.startswith("avx512.mask.unpckl.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.unpckh.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pand.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pandn.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.por.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pxor.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.and.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.andn.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.or.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.xor.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.padd.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psub.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.pmull.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.cvtdq2pd.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.cvtudq2pd.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.pmul.dq.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.pmulu.dq.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.packsswb.") \|\| // Added in 5.0
	Name.startswith("avx512.mask.packssdw.") \|\| // Added in 5.0
	Name.startswith("avx512.mask.packuswb.") \|\| // Added in 5.0
	Name.startswith("avx512.mask.packusdw.") \|\| // Added in 5.0
	Name.startswith("avx512.mask.cmp.b") \|\| // Added in 5.0
	Name.startswith("avx512.mask.cmp.d") \|\| // Added in 5.0
	Name.startswith("avx512.mask.cmp.q") \|\| // Added in 5.0
	Name.startswith("avx512.mask.cmp.w") \|\| // Added in 5.0
	Name.startswith("avx512.mask.ucmp.") \|\| // Added in 5.0
	Name == "avx512.mask.add.pd.128" \|\| // Added in 4.0
	Name == "avx512.mask.add.pd.256" \|\| // Added in 4.0
	Name == "avx512.mask.add.ps.128" \|\| // Added in 4.0
	Name == "avx512.mask.add.ps.256" \|\| // Added in 4.0
	Name == "avx512.mask.div.pd.128" \|\| // Added in 4.0
	Name == "avx512.mask.div.pd.256" \|\| // Added in 4.0
	Name == "avx512.mask.div.ps.128" \|\| // Added in 4.0
	Name == "avx512.mask.div.ps.256" \|\| // Added in 4.0
	Name == "avx512.mask.mul.pd.128" \|\| // Added in 4.0
	Name == "avx512.mask.mul.pd.256" \|\| // Added in 4.0
	Name == "avx512.mask.mul.ps.128" \|\| // Added in 4.0
	Name == "avx512.mask.mul.ps.256" \|\| // Added in 4.0
	Name == "avx512.mask.sub.pd.128" \|\| // Added in 4.0
	Name == "avx512.mask.sub.pd.256" \|\| // Added in 4.0
	Name == "avx512.mask.sub.ps.128" \|\| // Added in 4.0
	Name == "avx512.mask.sub.ps.256" \|\| // Added in 4.0
	Name == "avx512.mask.max.pd.128" \|\| // Added in 5.0
	Name == "avx512.mask.max.pd.256" \|\| // Added in 5.0
	Name == "avx512.mask.max.ps.128" \|\| // Added in 5.0
	Name == "avx512.mask.max.ps.256" \|\| // Added in 5.0
	Name == "avx512.mask.min.pd.128" \|\| // Added in 5.0
	Name == "avx512.mask.min.pd.256" \|\| // Added in 5.0
	Name == "avx512.mask.min.ps.128" \|\| // Added in 5.0
	Name == "avx512.mask.min.ps.256" \|\| // Added in 5.0
	Name.startswith("avx512.mask.vpermilvar.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psll.d") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psll.q") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psll.w") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psra.d") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psra.q") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psra.w") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrl.d") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrl.q") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrl.w") \|\| // Added in 4.0
	Name.startswith("avx512.mask.pslli") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrai") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrli") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psllv") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrav") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrlv") \|\| // Added in 4.0
	Name.startswith("sse41.pmovsx") \|\| // Added in 3.8
	Name.startswith("sse41.pmovzx") \|\| // Added in 3.9
	Name.startswith("avx2.pmovsx") \|\| // Added in 3.9
	Name.startswith("avx2.pmovzx") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pmovsx") \|\| // Added in 4.0
	Name.startswith("avx512.mask.pmovzx") \|\| // Added in 4.0
	Name.startswith("avx512.mask.lzcnt.") \|\| // Added in 5.0
	Name == "sse2.cvtdq2pd" \|\| // Added in 3.9
	Name == "sse2.cvtps2pd" \|\| // Added in 3.9
	Name == "avx.cvtdq2.pd.256" \|\| // Added in 3.9
	Name == "avx.cvt.ps2.pd.256" \|\| // Added in 3.9
	Name.startswith("avx.vinsertf128.") \|\| // Added in 3.7
	Name == "avx2.vinserti128" \|\| // Added in 3.7
	Name.startswith("avx512.mask.insert") \|\| // Added in 4.0
	Name.startswith("avx.vextractf128.") \|\| // Added in 3.7
	Name == "avx2.vextracti128" \|\| // Added in 3.7
	Name.startswith("avx512.mask.vextract") \|\| // Added in 4.0
	Name.startswith("sse4a.movnt.") \|\| // Added in 3.9
	Name.startswith("avx.movnt.") \|\| // Added in 3.2
	Name.startswith("avx512.storent.") \|\| // Added in 3.9
	Name == "sse41.movntdqa" \|\| // Added in 5.0
	Name == "avx2.movntdqa" \|\| // Added in 5.0
	Name == "avx512.movntdqa" \|\| // Added in 5.0
	Name == "sse2.storel.dq" \|\| // Added in 3.9
	Name.startswith("sse.storeu.") \|\| // Added in 3.9
	Name.startswith("sse2.storeu.") \|\| // Added in 3.9
	Name.startswith("avx.storeu.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.storeu.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.store.p") \|\| // Added in 3.9
	Name.startswith("avx512.mask.store.b.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.store.w.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.store.d.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.store.q.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.loadu.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.load.") \|\| // Added in 3.9
	Name == "sse42.crc32.64.8" \|\| // Added in 3.4
	Name.startswith("avx.vbroadcast.s") \|\| // Added in 3.5
	Name.startswith("avx512.mask.palignr.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.valign.") \|\| // Added in 4.0
	Name.startswith("sse2.psll.dq") \|\| // Added in 3.7
	Name.startswith("sse2.psrl.dq") \|\| // Added in 3.7
	Name.startswith("avx2.psll.dq") \|\| // Added in 3.7
	Name.startswith("avx2.psrl.dq") \|\| // Added in 3.7
	Name.startswith("avx512.psll.dq") \|\| // Added in 3.9
	Name.startswith("avx512.psrl.dq") \|\| // Added in 3.9
	Name == "sse41.pblendw" \|\| // Added in 3.7
	Name.startswith("sse41.blendp") \|\| // Added in 3.7
	Name.startswith("avx.blend.p") \|\| // Added in 3.7
	Name == "avx2.pblendw" \|\| // Added in 3.7
	Name.startswith("avx2.pblendd.") \|\| // Added in 3.7
	Name.startswith("avx.vbroadcastf128") \|\| // Added in 4.0
	Name == "avx2.vbroadcasti128" \|\| // Added in 3.7
	Name == "xop.vpcmov" \|\| // Added in 3.8
	Name == "xop.vpcmov.256" \|\| // Added in 5.0
	Name.startswith("avx512.mask.move.s") \|\| // Added in 4.0
	Name.startswith("avx512.cvtmask2") \|\| // Added in 5.0
	(Name.startswith("xop.vpcom") && // Added in 3.2
	F->arg_size() == 2))
	return true;

	return false;
	}

	static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
	Function *&NewFn) {
	// Only handle intrinsics that start with "x86.".
	if (!Name.startswith("x86."))
	return false;
	// Remove "x86." prefix.
	Name = Name.substr(4);

	if (ShouldUpgradeX86Intrinsic(F, Name)) {
	NewFn = nullptr;
	return true;
	}

	// SSE4.1 ptest functions may have an old signature.
	if (Name.startswith("sse41.ptest")) { // Added in 3.2
	if (Name.substr(11) == "c")
	return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
	if (Name.substr(11) == "z")
	return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
	if (Name.substr(11) == "nzc")
	return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
	}
	// Several blend and other instructions with masks used the wrong number of
	// bits.
	if (Name == "sse41.insertps") // Added in 3.6
	return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
	NewFn);
	if (Name == "sse41.dppd") // Added in 3.6
	return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
	NewFn);
	if (Name == "sse41.dpps") // Added in 3.6
	return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
	NewFn);
	if (Name == "sse41.mpsadbw") // Added in 3.6
	return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
	NewFn);
	if (Name == "avx.dp.ps.256") // Added in 3.6
	return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
	NewFn);
	if (Name == "avx2.mpsadbw") // Added in 3.6
	return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
	NewFn);

	// frcz.ss/sd may need to have an argument dropped. Added in 3.2
	if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::x86_xop_vfrcz_ss);
	return true;
	}
	if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::x86_xop_vfrcz_sd);
	return true;
	}
	// Upgrade any XOP PERMIL2 index operand still using a float/double vector.
	if (Name.startswith("xop.vpermil2")) { // Added in 3.9
	auto Idx = F->getFunctionType()->getParamType(2);
	if (Idx->isFPOrFPVectorTy()) {
	rename(F);
	unsigned IdxSize = Idx->getPrimitiveSizeInBits();
	unsigned EltSize = Idx->getScalarSizeInBits();
	Intrinsic::ID Permil2ID;
	if (EltSize == 64 && IdxSize == 128)
	Permil2ID = Intrinsic::x86_xop_vpermil2pd;
	else if (EltSize == 32 && IdxSize == 128)
	Permil2ID = Intrinsic::x86_xop_vpermil2ps;
	else if (EltSize == 64 && IdxSize == 256)
	Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
	else
	Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
	NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
	return true;
	}
	}

	return false;
	}

	static bool UpgradeIntrinsicFunction1(Function F, Function &NewFn) {
	assert(F && "Illegal to upgrade a non-existent Function.");

	// Quickly eliminate it, if it's not a candidate.
	StringRef Name = F->getName();
	if (Name.size() <= 8 \|\| !Name.startswith("llvm."))
	return false;
	Name = Name.substr(5); // Strip off "llvm."

	switch (Name[0]) {
	default: break;
	case 'a': {
	if (Name.startswith("arm.rbit") \|\| Name.startswith("aarch64.rbit")) {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
	F->arg_begin()->getType());
	return true;
	}
	if (Name.startswith("arm.neon.vclz")) {
	Type* args[2] = {
	F->arg_begin()->getType(),
	Type::getInt1Ty(F->getContext())
	};
	// Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
	// the end of the name. Change name from llvm.arm.neon.vclz.* to
	// llvm.ctlz.*
	FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
	NewFn = Function::Create(fType, F->getLinkage(),
	"llvm.ctlz." + Name.substr(14), F->getParent());
	return true;
	}
	if (Name.startswith("arm.neon.vcnt")) {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
	F->arg_begin()->getType());
	return true;
	}
	Regex vldRegex("^arm\\.neon\\.vld([1234]\|[234]lane)\\.v[a-z0-9]*$");
	if (vldRegex.match(Name)) {
	auto fArgs = F->getFunctionType()->params();
	SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
	// Can't use Intrinsic::getDeclaration here as the return types might
	// then only be structurally equal.
	FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
	NewFn = Function::Create(fType, F->getLinkage(),
	"llvm." + Name + ".p0i8", F->getParent());
	return true;
	}
	Regex vstRegex("^arm\\.neon\\.vst([1234]\|[234]lane)\\.v[a-z0-9]*$");
	if (vstRegex.match(Name)) {
	static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
	Intrinsic::arm_neon_vst2,
	Intrinsic::arm_neon_vst3,
	Intrinsic::arm_neon_vst4};

	static const Intrinsic::ID StoreLaneInts[] = {
	Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
	Intrinsic::arm_neon_vst4lane
	};

	auto fArgs = F->getFunctionType()->params();
	Type *Tys[] = {fArgs[0], fArgs[1]};
	if (Name.find("lane") == StringRef::npos)
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	StoreInts[fArgs.size() - 3], Tys);
	else
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	StoreLaneInts[fArgs.size() - 5], Tys);
	return true;
	}
	if (Name == "aarch64.thread.pointer" \|\| Name == "arm.thread.pointer") {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
	return true;
	}
	break;
	}

	case 'c': {
	if (Name.startswith("ctlz.") && F->arg_size() == 1) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
	F->arg_begin()->getType());
	return true;
	}
	if (Name.startswith("cttz.") && F->arg_size() == 1) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
	F->arg_begin()->getType());
	return true;
	}
	break;
	}
	case 'i':
	case 'l': {
	bool IsLifetimeStart = Name.startswith("lifetime.start");
	if (IsLifetimeStart \|\| Name.startswith("invariant.start")) {
	Intrinsic::ID ID = IsLifetimeStart ?
	Intrinsic::lifetime_start : Intrinsic::invariant_start;
	auto Args = F->getFunctionType()->params();
	Type* ObjectPtr[1] = {Args[1]};
	if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
	return true;
	}
	}

	bool IsLifetimeEnd = Name.startswith("lifetime.end");
	if (IsLifetimeEnd \|\| Name.startswith("invariant.end")) {
	Intrinsic::ID ID = IsLifetimeEnd ?
	Intrinsic::lifetime_end : Intrinsic::invariant_end;

	auto Args = F->getFunctionType()->params();
	Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
	if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
	return true;
	}
	}
	break;
	}
	case 'm': {
	if (Name.startswith("masked.load.")) {
	Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
	if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::masked_load,
	Tys);
	return true;
	}
	}
	if (Name.startswith("masked.store.")) {
	auto Args = F->getFunctionType()->params();
	Type *Tys[] = { Args[0], Args[1] };
	if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::masked_store,
	Tys);
	return true;
	}
	}
	// Renaming gather/scatter intrinsics with no address space overloading
	// to the new overload which includes an address space
	if (Name.startswith("masked.gather.")) {
	Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
	if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::masked_gather, Tys);
	return true;
	}
	}
	if (Name.startswith("masked.scatter.")) {
	auto Args = F->getFunctionType()->params();
	Type *Tys[] = {Args[0], Args[1]};
	if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::masked_scatter, Tys);
	return true;
	}
	}
	break;
	}
	case 'n': {
	if (Name.startswith("nvvm.")) {
	Name = Name.substr(5);

	// The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
	Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
	.Cases("brev32", "brev64", Intrinsic::bitreverse)
	.Case("clz.i", Intrinsic::ctlz)
	.Case("popc.i", Intrinsic::ctpop)
	.Default(Intrinsic::not_intrinsic);
	if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
	NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
	{F->getReturnType()});
	return true;
	}

	// The following nvvm intrinsics correspond exactly to an LLVM idiom, but
	// not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
	//
	// TODO: We could add lohi.i2d.
	bool Expand = StringSwitch<bool>(Name)
	.Cases("abs.i", "abs.ll", true)
	.Cases("clz.ll", "popc.ll", "h2f", true)
	.Cases("max.i", "max.ll", "max.ui", "max.ull", true)
	.Cases("min.i", "min.ll", "min.ui", "min.ull", true)
	.Default(false);
	if (Expand) {
	NewFn = nullptr;
	return true;
	}
	}
	break;
	}
	case 'o':
	// We only need to change the name to match the mangling including the
	// address space.
	if (Name.startswith("objectsize.")) {
	Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
	if (F->arg_size() == 2 \|\|
	F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
	Tys);
	return true;
	}
	}
	break;

	case 's':
	if (Name == "stackprotectorcheck") {
	NewFn = nullptr;
	return true;
	}
	break;

	case 'x':
	if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
	return true;
	}
	// Remangle our intrinsic since we upgrade the mangling
	auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
	if (Result != None) {
	NewFn = Result.getValue();
	return true;
	}

	// This may not belong here. This function is effectively being overloaded
	// to both detect an intrinsic which needs upgrading, and to provide the
	// upgraded form of the intrinsic. We should perhaps have two separate
	// functions for this.
	return false;
	}

	bool llvm::UpgradeIntrinsicFunction(Function F, Function &NewFn) {
	NewFn = nullptr;
	bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
	assert(F != NewFn && "Intrinsic function upgraded to the same function");

	// Upgrade intrinsic attributes. This does not change the function.
	if (NewFn)
	F = NewFn;
	if (Intrinsic::ID id = F->getIntrinsicID())
	F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
	return Upgraded;
	}

	bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
	// Nothing to do yet.
	return false;
	}

	// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
	// to byte shuffles.
	static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
	Value *Op, unsigned Shift) {
	Type *ResultTy = Op->getType();
	unsigned NumElts = ResultTy->getVectorNumElements() * 8;

	// Bitcast from a 64-bit element type to a byte element type.
	Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
	Op = Builder.CreateBitCast(Op, VecTy, "cast");

	// We'll be shuffling in zeroes.
	Value *Res = Constant::getNullValue(VecTy);

	// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
	// we'll just return the zero vector.
	if (Shift < 16) {
	uint32_t Idxs[64];
	// 256/512-bit version is split into 2/4 16-byte lanes.
	for (unsigned l = 0; l != NumElts; l += 16)
	for (unsigned i = 0; i != 16; ++i) {
	unsigned Idx = NumElts + i - Shift;
	if (Idx < NumElts)
	Idx -= NumElts - 16; // end of lane, switch operand.
	Idxs[l + i] = Idx + l;
	}

	Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
	}

	// Bitcast back to a 64-bit element type.
	return Builder.CreateBitCast(Res, ResultTy, "cast");
	}

	// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
	// to byte shuffles.
	static Value UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value Op,
	unsigned Shift) {
	Type *ResultTy = Op->getType();
	unsigned NumElts = ResultTy->getVectorNumElements() * 8;

	// Bitcast from a 64-bit element type to a byte element type.
	Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
	Op = Builder.CreateBitCast(Op, VecTy, "cast");

	// We'll be shuffling in zeroes.
	Value *Res = Constant::getNullValue(VecTy);

	// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
	// we'll just return the zero vector.
	if (Shift < 16) {
	uint32_t Idxs[64];
	// 256/512-bit version is split into 2/4 16-byte lanes.
	for (unsigned l = 0; l != NumElts; l += 16)
	for (unsigned i = 0; i != 16; ++i) {
	unsigned Idx = i + Shift;
	if (Idx >= 16)
	Idx += NumElts - 16; // end of lane, switch operand.
	Idxs[l + i] = Idx + l;
	}

	Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
	}

	// Bitcast back to a 64-bit element type.
	return Builder.CreateBitCast(Res, ResultTy, "cast");
	}

	static Value getX86MaskVec(IRBuilder<> &Builder, Value Mask,
	unsigned NumElts) {
	llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
	cast<IntegerType>(Mask->getType())->getBitWidth());
	Mask = Builder.CreateBitCast(Mask, MaskTy);

	// If we have less than 8 elements, then the starting mask was an i8 and
	// we need to extract down to the right number of elements.
	if (NumElts < 8) {
	uint32_t Indices[4];
	for (unsigned i = 0; i != NumElts; ++i)
	Indices[i] = i;
	Mask = Builder.CreateShuffleVector(Mask, Mask,
	makeArrayRef(Indices, NumElts),
	"extract");
	}

	return Mask;
	}

	static Value EmitX86Select(IRBuilder<> &Builder, Value Mask,
	Value Op0, Value Op1) {
	// If the mask is all ones just emit the align operation.
	if (const auto *C = dyn_cast<Constant>(Mask))
	if (C->isAllOnesValue())
	return Op0;

	Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
	return Builder.CreateSelect(Mask, Op0, Op1);
	}

	// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
	// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
	// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
	static Value UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value Op0,
	Value Op1, Value Shift,
	Value Passthru, Value Mask,
	bool IsVALIGN) {
	unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();

	unsigned NumElts = Op0->getType()->getVectorNumElements();
	assert((IsVALIGN \|\| NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
	assert((!IsVALIGN \|\| NumElts <= 16) && "NumElts too large for VALIGN!");
	assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");

	// Mask the immediate for VALIGN.
	if (IsVALIGN)
	ShiftVal &= (NumElts - 1);

	// If palignr is shifting the pair of vectors more than the size of two
	// lanes, emit zero.
	if (ShiftVal >= 32)
	return llvm::Constant::getNullValue(Op0->getType());

	// If palignr is shifting the pair of input vectors more than one lane,
	// but less than two lanes, convert to shifting in zeroes.
	if (ShiftVal > 16) {
	ShiftVal -= 16;
	Op1 = Op0;
	Op0 = llvm::Constant::getNullValue(Op0->getType());
	}

	uint32_t Indices[64];
	// 256-bit palignr operates on 128-bit lanes so we need to handle that
	for (unsigned l = 0; l < NumElts; l += 16) {
	for (unsigned i = 0; i != 16; ++i) {
	unsigned Idx = ShiftVal + i;
	if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
	Idx += NumElts - 16; // End of lane, switch operand.
	Indices[l + i] = Idx + l;
	}
	}

	Value *Align = Builder.CreateShuffleVector(Op1, Op0,
	makeArrayRef(Indices, NumElts),
	"palignr");

	return EmitX86Select(Builder, Mask, Align, Passthru);
	}

	static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
	Value Ptr, Value Data, Value *Mask,
	bool Aligned) {
	// Cast the pointer to the right type.
	Ptr = Builder.CreateBitCast(Ptr,
	llvm::PointerType::getUnqual(Data->getType()));
	unsigned Align =
	Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;

	// If the mask is all ones just emit a regular store.
	if (const auto *C = dyn_cast<Constant>(Mask))
	if (C->isAllOnesValue())
	return Builder.CreateAlignedStore(Data, Ptr, Align);

	// Convert the mask from an integer type to a vector of i1.
	unsigned NumElts = Data->getType()->getVectorNumElements();
	Mask = getX86MaskVec(Builder, Mask, NumElts);
	return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
	}

	static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
	Value Ptr, Value Passthru, Value *Mask,
	bool Aligned) {
	// Cast the pointer to the right type.
	Ptr = Builder.CreateBitCast(Ptr,
	llvm::PointerType::getUnqual(Passthru->getType()));
	unsigned Align =
	Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;

	// If the mask is all ones just emit a regular store.
	if (const auto *C = dyn_cast<Constant>(Mask))
	if (C->isAllOnesValue())
	return Builder.CreateAlignedLoad(Ptr, Align);

	// Convert the mask from an integer type to a vector of i1.
	unsigned NumElts = Passthru->getType()->getVectorNumElements();
	Mask = getX86MaskVec(Builder, Mask, NumElts);
	return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
	}

	static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
	ICmpInst::Predicate Pred) {
	Value *Op0 = CI.getArgOperand(0);
	Value *Op1 = CI.getArgOperand(1);
	Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
	Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);

	if (CI.getNumArgOperands() == 4)
	Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));

	return Res;
	}

	static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
	unsigned CC, bool Signed) {
	Value *Op0 = CI.getArgOperand(0);
	unsigned NumElts = Op0->getType()->getVectorNumElements();

	Value *Cmp;
	if (CC == 3) {
	Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
	} else if (CC == 7) {
	Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
	} else {
	ICmpInst::Predicate Pred;
	switch (CC) {
	default: llvm_unreachable("Unknown condition code");
	case 0: Pred = ICmpInst::ICMP_EQ; break;
	case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
	case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
	case 4: Pred = ICmpInst::ICMP_NE; break;
	case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
	case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
	}
	Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
	}

	Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
	const auto *C = dyn_cast<Constant>(Mask);
	if (!C \|\| !C->isAllOnesValue())
	Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));

	if (NumElts < 8) {
	uint32_t Indices[8];
	for (unsigned i = 0; i != NumElts; ++i)
	Indices[i] = i;
	for (unsigned i = NumElts; i != 8; ++i)
	Indices[i] = NumElts + i % NumElts;
	Cmp = Builder.CreateShuffleVector(Cmp,
	Constant::getNullValue(Cmp->getType()),
	Indices);
	}
	return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
	std::max(NumElts, 8U)));
	}

	// Replace a masked intrinsic with an older unmasked intrinsic.
	static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
	Intrinsic::ID IID) {
	Function *F = CI.getCalledFunction();
	Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
	Value *Rep = Builder.CreateCall(Intrin,
	{ CI.getArgOperand(0), CI.getArgOperand(1) });
	return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
	}

	static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
	Value* A = CI.getArgOperand(0);
	Value* B = CI.getArgOperand(1);
	Value* Src = CI.getArgOperand(2);
	Value* Mask = CI.getArgOperand(3);

	Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
	Value* Cmp = Builder.CreateIsNotNull(AndNode);
	Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
	Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
	Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
	return Builder.CreateInsertElement(A, Select, (uint64_t)0);
	}


	static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
	Value* Op = CI.getArgOperand(0);
	Type* ReturnOp = CI.getType();
	unsigned NumElts = CI.getType()->getVectorNumElements();
	Value *Mask = getX86MaskVec(Builder, Op, NumElts);
	return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
	}

	/// Upgrade a call to an old intrinsic. All argument and return casting must be
	/// provided to seamlessly integrate with existing context.
	void llvm::UpgradeIntrinsicCall(CallInst CI, Function NewFn) {
	Function *F = CI->getCalledFunction();
	LLVMContext &C = CI->getContext();
	IRBuilder<> Builder(C);
	Builder.SetInsertPoint(CI->getParent(), CI->getIterator());

	assert(F && "Intrinsic call is not direct?");

	if (!NewFn) {
	// Get the Function's name.
	StringRef Name = F->getName();

	assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
	Name = Name.substr(5);

	bool IsX86 = Name.startswith("x86.");
	if (IsX86)
	Name = Name.substr(4);
	bool IsNVVM = Name.startswith("nvvm.");
	if (IsNVVM)
	Name = Name.substr(5);

	if (IsX86 && Name.startswith("sse4a.movnt.")) {
	Module *M = F->getParent();
	SmallVector<Metadata *, 1> Elts;
	Elts.push_back(
	ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
	MDNode *Node = MDNode::get(C, Elts);

	Value *Arg0 = CI->getArgOperand(0);
	Value *Arg1 = CI->getArgOperand(1);

	// Nontemporal (unaligned) store of the 0'th element of the float/double
	// vector.
	Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
	PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
	Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
	Value *Extract =
	Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");

	StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
	SI->setMetadata(M->getMDKindID("nontemporal"), Node);

	// Remove intrinsic.
	CI->eraseFromParent();
	return;
	}

	if (IsX86 && (Name.startswith("avx.movnt.") \|\|
	Name.startswith("avx512.storent."))) {
	Module *M = F->getParent();
	SmallVector<Metadata *, 1> Elts;
	Elts.push_back(
	ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
	MDNode *Node = MDNode::get(C, Elts);

	Value *Arg0 = CI->getArgOperand(0);
	Value *Arg1 = CI->getArgOperand(1);

	// Convert the type of the pointer to a pointer to the stored type.
	Value *BC = Builder.CreateBitCast(Arg0,
	PointerType::getUnqual(Arg1->getType()),
	"cast");
	VectorType *VTy = cast<VectorType>(Arg1->getType());
	StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
	VTy->getBitWidth() / 8);
	SI->setMetadata(M->getMDKindID("nontemporal"), Node);

	// Remove intrinsic.
	CI->eraseFromParent();
	return;
	}

	if (IsX86 && Name == "sse2.storel.dq") {
	Value *Arg0 = CI->getArgOperand(0);
	Value *Arg1 = CI->getArgOperand(1);

	Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
	Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
	Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
	Value *BC = Builder.CreateBitCast(Arg0,
	PointerType::getUnqual(Elt->getType()),
	"cast");
	Builder.CreateAlignedStore(Elt, BC, 1);

	// Remove intrinsic.
	CI->eraseFromParent();
	return;
	}

	if (IsX86 && (Name.startswith("sse.storeu.") \|\|
	Name.startswith("sse2.storeu.") \|\|
	Name.startswith("avx.storeu."))) {
	Value *Arg0 = CI->getArgOperand(0);
	Value *Arg1 = CI->getArgOperand(1);

	Arg0 = Builder.CreateBitCast(Arg0,
	PointerType::getUnqual(Arg1->getType()),
	"cast");
	Builder.CreateAlignedStore(Arg1, Arg0, 1);

	// Remove intrinsic.
	CI->eraseFromParent();
	return;
	}

	if (IsX86 && (Name.startswith("avx512.mask.store"))) {
	// "avx512.mask.storeu." or "avx512.mask.store."
	bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
	UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(2), Aligned);

	// Remove intrinsic.
	CI->eraseFromParent();
	return;
	}

	Value *Rep;
	// Upgrade packed integer vector compare intrinsics to compare instructions.
	if (IsX86 && (Name.startswith("sse2.pcmp") \|\|
	Name.startswith("avx2.pcmp"))) {
	// "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
	bool CmpEq = Name[9] == 'e';
	Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
	CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = Builder.CreateSExt(Rep, CI->getType(), "");
	} else if (IsX86 && (Name == "sse.add.ss" \|\| Name == "sse2.add.sd")) {
	Type *I32Ty = Type::getInt32Ty(C);
	Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
	ConstantInt::get(I32Ty, 0));
	Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
	ConstantInt::get(I32Ty, 0));
	Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
	Builder.CreateFAdd(Elt0, Elt1),
	ConstantInt::get(I32Ty, 0));
	} else if (IsX86 && (Name == "sse.sub.ss" \|\| Name == "sse2.sub.sd")) {
	Type *I32Ty = Type::getInt32Ty(C);
	Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
	ConstantInt::get(I32Ty, 0));
	Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
	ConstantInt::get(I32Ty, 0));
	Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
	Builder.CreateFSub(Elt0, Elt1),
	ConstantInt::get(I32Ty, 0));
	} else if (IsX86 && (Name == "sse.mul.ss" \|\| Name == "sse2.mul.sd")) {
	Type *I32Ty = Type::getInt32Ty(C);
	Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
	ConstantInt::get(I32Ty, 0));
	Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
	ConstantInt::get(I32Ty, 0));
	Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
	Builder.CreateFMul(Elt0, Elt1),
	ConstantInt::get(I32Ty, 0));
	} else if (IsX86 && (Name == "sse.div.ss" \|\| Name == "sse2.div.sd")) {
	Type *I32Ty = Type::getInt32Ty(C);
	Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
	ConstantInt::get(I32Ty, 0));
	Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
	ConstantInt::get(I32Ty, 0));
	Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
	Builder.CreateFDiv(Elt0, Elt1),
	ConstantInt::get(I32Ty, 0));
	} else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
	// "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
	bool CmpEq = Name[16] == 'e';
	Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
	} else if (IsX86 && Name.startswith("avx512.mask.cmp")) {
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
	Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
	} else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
	Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
	} else if (IsX86 && (Name == "sse41.pmaxsb" \|\|
	Name == "sse2.pmaxs.w" \|\|
	Name == "sse41.pmaxsd" \|\|
	Name.startswith("avx2.pmaxs") \|\|
	Name.startswith("avx512.mask.pmaxs"))) {
	Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
	} else if (IsX86 && (Name == "sse2.pmaxu.b" \|\|
	Name == "sse41.pmaxuw" \|\|
	Name == "sse41.pmaxud" \|\|
	Name.startswith("avx2.pmaxu") \|\|
	Name.startswith("avx512.mask.pmaxu"))) {
	Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
	} else if (IsX86 && (Name == "sse41.pminsb" \|\|
	Name == "sse2.pmins.w" \|\|
	Name == "sse41.pminsd" \|\|
	Name.startswith("avx2.pmins") \|\|
	Name.startswith("avx512.mask.pmins"))) {
	Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
	} else if (IsX86 && (Name == "sse2.pminu.b" \|\|
	Name == "sse41.pminuw" \|\|
	Name == "sse41.pminud" \|\|
	Name.startswith("avx2.pminu") \|\|
	Name.startswith("avx512.mask.pminu"))) {
	Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
	} else if (IsX86 && (Name == "sse2.cvtdq2pd" \|\|
	Name == "sse2.cvtps2pd" \|\|
	Name == "avx.cvtdq2.pd.256" \|\|
	Name == "avx.cvt.ps2.pd.256" \|\|
	Name.startswith("avx512.mask.cvtdq2pd.") \|\|
	Name.startswith("avx512.mask.cvtudq2pd."))) {
	// Lossless i32/float to double conversion.
	// Extract the bottom elements if necessary and convert to double vector.
	Value *Src = CI->getArgOperand(0);
	VectorType *SrcTy = cast<VectorType>(Src->getType());
	VectorType *DstTy = cast<VectorType>(CI->getType());
	Rep = CI->getArgOperand(0);

	unsigned NumDstElts = DstTy->getNumElements();
	if (NumDstElts < SrcTy->getNumElements()) {
	assert(NumDstElts == 2 && "Unexpected vector size");
	uint32_t ShuffleMask[2] = { 0, 1 };
	Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
	ShuffleMask);
	}

	bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
	bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
	if (SInt2Double)
	Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
	else if (UInt2Double)
	Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
	else
	Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");

	if (CI->getNumArgOperands() == 3)
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
	Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
	CI->getArgOperand(1), CI->getArgOperand(2),
	/Aligned/false);
	} else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
	Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
	CI->getArgOperand(1),CI->getArgOperand(2),
	/Aligned/true);
	} else if (IsX86 && Name.startswith("xop.vpcom")) {
	Intrinsic::ID intID;
	if (Name.endswith("ub"))
	intID = Intrinsic::x86_xop_vpcomub;
	else if (Name.endswith("uw"))
	intID = Intrinsic::x86_xop_vpcomuw;
	else if (Name.endswith("ud"))
	intID = Intrinsic::x86_xop_vpcomud;
	else if (Name.endswith("uq"))
	intID = Intrinsic::x86_xop_vpcomuq;
	else if (Name.endswith("b"))
	intID = Intrinsic::x86_xop_vpcomb;
	else if (Name.endswith("w"))
	intID = Intrinsic::x86_xop_vpcomw;
	else if (Name.endswith("d"))
	intID = Intrinsic::x86_xop_vpcomd;
	else if (Name.endswith("q"))
	intID = Intrinsic::x86_xop_vpcomq;
	else
	llvm_unreachable("Unknown suffix");

	Name = Name.substr(9); // strip off "xop.vpcom"
	unsigned Imm;
	if (Name.startswith("lt"))
	Imm = 0;
	else if (Name.startswith("le"))
	Imm = 1;
	else if (Name.startswith("gt"))
	Imm = 2;
	else if (Name.startswith("ge"))
	Imm = 3;
	else if (Name.startswith("eq"))
	Imm = 4;
	else if (Name.startswith("ne"))
	Imm = 5;
	else if (Name.startswith("false"))
	Imm = 6;
	else if (Name.startswith("true"))
	Imm = 7;
	else
	llvm_unreachable("Unknown condition");

	Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
	Rep =
	Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
	Builder.getInt8(Imm)});
	} else if (IsX86 && Name.startswith("xop.vpcmov")) {
	Value *Sel = CI->getArgOperand(2);
	Value *NotSel = Builder.CreateNot(Sel);
	Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
	Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
	Rep = Builder.CreateOr(Sel0, Sel1);
	} else if (IsX86 && Name == "sse42.crc32.64.8") {
	Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::x86_sse42_crc32_32_8);
	Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
	Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
	Rep = Builder.CreateZExt(Rep, CI->getType(), "");
	} else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
	// Replace broadcasts with a series of insertelements.
	Type *VecTy = CI->getType();
	Type *EltTy = VecTy->getVectorElementType();
	unsigned EltNum = VecTy->getVectorNumElements();
	Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
	EltTy->getPointerTo());
	Value *Load = Builder.CreateLoad(EltTy, Cast);
	Type *I32Ty = Type::getInt32Ty(C);
	Rep = UndefValue::get(VecTy);
	for (unsigned I = 0; I < EltNum; ++I)
	Rep = Builder.CreateInsertElement(Rep, Load,
	ConstantInt::get(I32Ty, I));
	} else if (IsX86 && (Name.startswith("sse41.pmovsx") \|\|
	Name.startswith("sse41.pmovzx") \|\|
	Name.startswith("avx2.pmovsx") \|\|
	Name.startswith("avx2.pmovzx") \|\|
	Name.startswith("avx512.mask.pmovsx") \|\|
	Name.startswith("avx512.mask.pmovzx"))) {
	VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
	VectorType *DstTy = cast<VectorType>(CI->getType());
	unsigned NumDstElts = DstTy->getNumElements();

	// Extract a subvector of the first NumDstElts lanes and sign/zero extend.
	SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
	for (unsigned i = 0; i != NumDstElts; ++i)
	ShuffleMask[i] = i;

	Value *SV = Builder.CreateShuffleVector(
	CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);

	bool DoSext = (StringRef::npos != Name.find("pmovsx"));
	Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
	: Builder.CreateZExt(SV, DstTy);
	// If there are 3 arguments, it's a masked intrinsic so we need a select.
	if (CI->getNumArgOperands() == 3)
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && (Name.startswith("avx.vbroadcastf128") \|\|
	Name == "avx2.vbroadcasti128")) {
	// Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
	Type *EltTy = CI->getType()->getVectorElementType();
	unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
	Type *VT = VectorType::get(EltTy, NumSrcElts);
	Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
	PointerType::getUnqual(VT));
	Value *Load = Builder.CreateAlignedLoad(Op, 1);
	if (NumSrcElts == 2)
	Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
	{ 0, 1, 0, 1 });
	else
	Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
	{ 0, 1, 2, 3, 0, 1, 2, 3 });
	} else if (IsX86 && (Name.startswith("avx2.pbroadcast") \|\|
	Name.startswith("avx2.vbroadcast") \|\|
	Name.startswith("avx512.pbroadcast") \|\|
	Name.startswith("avx512.mask.broadcast.s"))) {
	// Replace vp?broadcasts with a vector shuffle.
	Value *Op = CI->getArgOperand(0);
	unsigned NumElts = CI->getType()->getVectorNumElements();
	Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
	Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
	Constant::getNullValue(MaskTy));

	if (CI->getNumArgOperands() == 3)
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
	Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
	CI->getArgOperand(1),
	CI->getArgOperand(2),
	CI->getArgOperand(3),
	CI->getArgOperand(4),
	false);
	} else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
	Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
	CI->getArgOperand(1),
	CI->getArgOperand(2),
	CI->getArgOperand(3),
	CI->getArgOperand(4),
	true);
	} else if (IsX86 && (Name == "sse2.psll.dq" \|\|
	Name == "avx2.psll.dq")) {
	// 128/256-bit shift left specified in bits.
	unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
	Shift / 8); // Shift is in bits.
	} else if (IsX86 && (Name == "sse2.psrl.dq" \|\|
	Name == "avx2.psrl.dq")) {
	// 128/256-bit shift right specified in bits.
	unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
	Shift / 8); // Shift is in bits.
	} else if (IsX86 && (Name == "sse2.psll.dq.bs" \|\|
	Name == "avx2.psll.dq.bs" \|\|
	Name == "avx512.psll.dq.512")) {
	// 128/256/512-bit shift left specified in bytes.
	unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
	} else if (IsX86 && (Name == "sse2.psrl.dq.bs" \|\|
	Name == "avx2.psrl.dq.bs" \|\|
	Name == "avx512.psrl.dq.512")) {
	// 128/256/512-bit shift right specified in bytes.
	unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
	} else if (IsX86 && (Name == "sse41.pblendw" \|\|
	Name.startswith("sse41.blendp") \|\|
	Name.startswith("avx.blend.p") \|\|
	Name == "avx2.pblendw" \|\|
	Name.startswith("avx2.pblendd."))) {
	Value *Op0 = CI->getArgOperand(0);
	Value *Op1 = CI->getArgOperand(1);
	unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
	VectorType *VecTy = cast<VectorType>(CI->getType());
	unsigned NumElts = VecTy->getNumElements();

	SmallVector<uint32_t, 16> Idxs(NumElts);
	for (unsigned i = 0; i != NumElts; ++i)
	Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;

	Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
	} else if (IsX86 && (Name.startswith("avx.vinsertf128.") \|\|
	Name == "avx2.vinserti128" \|\|
	Name.startswith("avx512.mask.insert"))) {
	Value *Op0 = CI->getArgOperand(0);
	Value *Op1 = CI->getArgOperand(1);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
	unsigned DstNumElts = CI->getType()->getVectorNumElements();
	unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
	unsigned Scale = DstNumElts / SrcNumElts;

	// Mask off the high bits of the immediate value; hardware ignores those.
	Imm = Imm % Scale;

	// Extend the second operand into a vector the size of the destination.
	Value *UndefV = UndefValue::get(Op1->getType());
	SmallVector<uint32_t, 8> Idxs(DstNumElts);
	for (unsigned i = 0; i != SrcNumElts; ++i)
	Idxs[i] = i;
	for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
	Idxs[i] = SrcNumElts;
	Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);

	// Insert the second operand into the first operand.

	// Note that there is no guarantee that instruction lowering will actually
	// produce a vinsertf128 instruction for the created shuffles. In
	// particular, the 0 immediate case involves no lane changes, so it can
	// be handled as a blend.

	// Example of shuffle mask for 32-bit elements:
	// Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
	// Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >

	// First fill with identify mask.
	for (unsigned i = 0; i != DstNumElts; ++i)
	Idxs[i] = i;
	// Then replace the elements where we need to insert.
	for (unsigned i = 0; i != SrcNumElts; ++i)
	Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
	Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);

	// If the intrinsic has a mask operand, handle that.
	if (CI->getNumArgOperands() == 5)
	Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
	CI->getArgOperand(3));
	} else if (IsX86 && (Name.startswith("avx.vextractf128.") \|\|
	Name == "avx2.vextracti128" \|\|
	Name.startswith("avx512.mask.vextract"))) {
	Value *Op0 = CI->getArgOperand(0);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	unsigned DstNumElts = CI->getType()->getVectorNumElements();
	unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
	unsigned Scale = SrcNumElts / DstNumElts;

	// Mask off the high bits of the immediate value; hardware ignores those.
	Imm = Imm % Scale;

	// Get indexes for the subvector of the input vector.
	SmallVector<uint32_t, 8> Idxs(DstNumElts);
	for (unsigned i = 0; i != DstNumElts; ++i) {
	Idxs[i] = i + (Imm * DstNumElts);
	}
	Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

	// If the intrinsic has a mask operand, handle that.
	if (CI->getNumArgOperands() == 4)
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (!IsX86 && Name == "stackprotectorcheck") {
	Rep = nullptr;
	} else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") \|\|
	Name.startswith("avx512.mask.perm.di."))) {
	Value *Op0 = CI->getArgOperand(0);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	VectorType *VecTy = cast<VectorType>(CI->getType());
	unsigned NumElts = VecTy->getNumElements();

	SmallVector<uint32_t, 8> Idxs(NumElts);
	for (unsigned i = 0; i != NumElts; ++i)
	Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);

	Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

	if (CI->getNumArgOperands() == 4)
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name.startswith("avx.vpermil.") \|\|
	Name == "sse2.pshuf.d" \|\|
	Name.startswith("avx512.mask.vpermil.p") \|\|
	Name.startswith("avx512.mask.pshuf.d."))) {
	Value *Op0 = CI->getArgOperand(0);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	VectorType *VecTy = cast<VectorType>(CI->getType());
	unsigned NumElts = VecTy->getNumElements();
	// Calculate the size of each index in the immediate.
	unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
	unsigned IdxMask = ((1 << IdxSize) - 1);

	SmallVector<uint32_t, 8> Idxs(NumElts);
	// Lookup the bits for this element, wrapping around the immediate every
	// 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
	// to offset by the first index of each group.
	for (unsigned i = 0; i != NumElts; ++i)
	Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) \| (i & ~IdxMask);

	Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

	if (CI->getNumArgOperands() == 4)
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name == "sse2.pshufl.w" \|\|
	Name.startswith("avx512.mask.pshufl.w."))) {
	Value *Op0 = CI->getArgOperand(0);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	unsigned NumElts = CI->getType()->getVectorNumElements();

	SmallVector<uint32_t, 16> Idxs(NumElts);
	for (unsigned l = 0; l != NumElts; l += 8) {
	for (unsigned i = 0; i != 4; ++i)
	Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
	for (unsigned i = 4; i != 8; ++i)
	Idxs[i + l] = i + l;
	}

	Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

	if (CI->getNumArgOperands() == 4)
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name == "sse2.pshufh.w" \|\|
	Name.startswith("avx512.mask.pshufh.w."))) {
	Value *Op0 = CI->getArgOperand(0);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	unsigned NumElts = CI->getType()->getVectorNumElements();

	SmallVector<uint32_t, 16> Idxs(NumElts);
	for (unsigned l = 0; l != NumElts; l += 8) {
	for (unsigned i = 0; i != 4; ++i)
	Idxs[i + l] = i + l;
	for (unsigned i = 0; i != 4; ++i)
	Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
	}

	Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

	if (CI->getNumArgOperands() == 4)
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
	Value *Op0 = CI->getArgOperand(0);
	Value *Op1 = CI->getArgOperand(1);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
	unsigned NumElts = CI->getType()->getVectorNumElements();

	unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
	unsigned HalfLaneElts = NumLaneElts / 2;

	SmallVector<uint32_t, 16> Idxs(NumElts);
	for (unsigned i = 0; i != NumElts; ++i) {
	// Base index is the starting element of the lane.
	Idxs[i] = i - (i % NumLaneElts);
	// If we are half way through the lane switch to the other source.
	if ((i % NumLaneElts) >= HalfLaneElts)
	Idxs[i] += NumElts;
	// Now select the specific element. By adding HalfLaneElts bits from
	// the immediate. Wrapping around the immediate every 8-bits.
	Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
	}

	Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);

	Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
	CI->getArgOperand(3));
	} else if (IsX86 && (Name.startswith("avx512.mask.movddup") \|\|
	Name.startswith("avx512.mask.movshdup") \|\|
	Name.startswith("avx512.mask.movsldup"))) {
	Value *Op0 = CI->getArgOperand(0);
	unsigned NumElts = CI->getType()->getVectorNumElements();
	unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();

	unsigned Offset = 0;
	if (Name.startswith("avx512.mask.movshdup."))
	Offset = 1;

	SmallVector<uint32_t, 16> Idxs(NumElts);
	for (unsigned l = 0; l != NumElts; l += NumLaneElts)
	for (unsigned i = 0; i != NumLaneElts; i += 2) {
	Idxs[i + l + 0] = i + l + Offset;
	Idxs[i + l + 1] = i + l + Offset;
	}

	Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && (Name.startswith("avx512.mask.punpckl") \|\|
	Name.startswith("avx512.mask.unpckl."))) {
	Value *Op0 = CI->getArgOperand(0);
	Value *Op1 = CI->getArgOperand(1);
	int NumElts = CI->getType()->getVectorNumElements();
	int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();

	SmallVector<uint32_t, 64> Idxs(NumElts);
	for (int l = 0; l != NumElts; l += NumLaneElts)
	for (int i = 0; i != NumLaneElts; ++i)
	Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);

	Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);

	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name.startswith("avx512.mask.punpckh") \|\|
	Name.startswith("avx512.mask.unpckh."))) {
	Value *Op0 = CI->getArgOperand(0);
	Value *Op1 = CI->getArgOperand(1);
	int NumElts = CI->getType()->getVectorNumElements();
	int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();

	SmallVector<uint32_t, 64> Idxs(NumElts);
	for (int l = 0; l != NumElts; l += NumLaneElts)
	for (int i = 0; i != NumLaneElts; ++i)
	Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);

	Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);

	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
	Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
	Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
	CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.por.")) {
	Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
	Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.and.")) {
	VectorType *FTy = cast<VectorType>(CI->getType());
	VectorType *ITy = VectorType::getInteger(FTy);
	Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
	Builder.CreateBitCast(CI->getArgOperand(1), ITy));
	Rep = Builder.CreateBitCast(Rep, FTy);
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
	VectorType *FTy = cast<VectorType>(CI->getType());
	VectorType *ITy = VectorType::getInteger(FTy);
	Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
	Rep = Builder.CreateAnd(Rep,
	Builder.CreateBitCast(CI->getArgOperand(1), ITy));
	Rep = Builder.CreateBitCast(Rep, FTy);
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.or.")) {
	VectorType *FTy = cast<VectorType>(CI->getType());
	VectorType *ITy = VectorType::getInteger(FTy);
	Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
	Builder.CreateBitCast(CI->getArgOperand(1), ITy));
	Rep = Builder.CreateBitCast(Rep, FTy);
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
	VectorType *FTy = cast<VectorType>(CI->getType());
	VectorType *ITy = VectorType::getInteger(FTy);
	Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
	Builder.CreateBitCast(CI->getArgOperand(1), ITy));
	Rep = Builder.CreateBitCast(Rep, FTy);
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
	Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
	Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
	Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
	Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
	Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
	Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
	Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::ctlz,
	CI->getType()),
	{ CI->getArgOperand(0), Builder.getInt1(false) });
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && (Name.startswith("avx512.mask.max.p") \|\|
	Name.startswith("avx512.mask.min.p"))) {
	bool IsMin = Name[13] == 'i';
	VectorType *VecTy = cast<VectorType>(CI->getType());
	unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
	unsigned EltWidth = VecTy->getScalarSizeInBits();
	Intrinsic::ID IID;
	if (!IsMin && VecWidth == 128 && EltWidth == 32)
	IID = Intrinsic::x86_sse_max_ps;
	else if (!IsMin && VecWidth == 128 && EltWidth == 64)
	IID = Intrinsic::x86_sse2_max_pd;
	else if (!IsMin && VecWidth == 256 && EltWidth == 32)
	IID = Intrinsic::x86_avx_max_ps_256;
	else if (!IsMin && VecWidth == 256 && EltWidth == 64)
	IID = Intrinsic::x86_avx_max_pd_256;
	else if (IsMin && VecWidth == 128 && EltWidth == 32)
	IID = Intrinsic::x86_sse_min_ps;
	else if (IsMin && VecWidth == 128 && EltWidth == 64)
	IID = Intrinsic::x86_sse2_min_pd;
	else if (IsMin && VecWidth == 256 && EltWidth == 32)
	IID = Intrinsic::x86_avx_min_ps_256;
	else if (IsMin && VecWidth == 256 && EltWidth == 64)
	IID = Intrinsic::x86_avx_min_pd_256;
	else
	llvm_unreachable("Unexpected intrinsic");

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	{ CI->getArgOperand(0), CI->getArgOperand(1) });
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
	VectorType *VecTy = cast<VectorType>(CI->getType());
	Intrinsic::ID IID;
	if (VecTy->getPrimitiveSizeInBits() == 128)
	IID = Intrinsic::x86_ssse3_pshuf_b_128;
	else if (VecTy->getPrimitiveSizeInBits() == 256)
	IID = Intrinsic::x86_avx2_pshuf_b;
	else if (VecTy->getPrimitiveSizeInBits() == 512)
	IID = Intrinsic::x86_avx512_pshuf_b_512;
	else
	llvm_unreachable("Unexpected intrinsic");

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	{ CI->getArgOperand(0), CI->getArgOperand(1) });
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") \|\|
	Name.startswith("avx512.mask.pmulu.dq."))) {
	bool IsUnsigned = Name[16] == 'u';
	VectorType *VecTy = cast<VectorType>(CI->getType());
	Intrinsic::ID IID;
	if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
	IID = Intrinsic::x86_sse41_pmuldq;
	else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
	IID = Intrinsic::x86_avx2_pmul_dq;
	else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
	IID = Intrinsic::x86_avx512_pmul_dq_512;
	else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
	IID = Intrinsic::x86_sse2_pmulu_dq;
	else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
	IID = Intrinsic::x86_avx2_pmulu_dq;
	else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
	IID = Intrinsic::x86_avx512_pmulu_dq_512;
	else
	llvm_unreachable("Unexpected intrinsic");

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	{ CI->getArgOperand(0), CI->getArgOperand(1) });
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.pack")) {
	bool IsUnsigned = Name[16] == 'u';
	bool IsDW = Name[18] == 'd';
	VectorType *VecTy = cast<VectorType>(CI->getType());
	Intrinsic::ID IID;
	if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
	IID = Intrinsic::x86_sse2_packsswb_128;
	else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
	IID = Intrinsic::x86_avx2_packsswb;
	else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
	IID = Intrinsic::x86_avx512_packsswb_512;
	else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
	IID = Intrinsic::x86_sse2_packssdw_128;
	else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
	IID = Intrinsic::x86_avx2_packssdw;
	else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
	IID = Intrinsic::x86_avx512_packssdw_512;
	else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
	IID = Intrinsic::x86_sse2_packuswb_128;
	else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
	IID = Intrinsic::x86_avx2_packuswb;
	else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
	IID = Intrinsic::x86_avx512_packuswb_512;
	else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
	IID = Intrinsic::x86_sse41_packusdw;
	else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
	IID = Intrinsic::x86_avx2_packusdw;
	else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
	IID = Intrinsic::x86_avx512_packusdw_512;
	else
	llvm_unreachable("Unexpected intrinsic");

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	{ CI->getArgOperand(0), CI->getArgOperand(1) });
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.psll")) {
	bool IsImmediate = Name[16] == 'i' \|\|
	(Name.size() > 18 && Name[18] == 'i');
	bool IsVariable = Name[16] == 'v';
	char Size = Name[16] == '.' ? Name[17] :
	Name[17] == '.' ? Name[18] :
	Name[18] == '.' ? Name[19] :
	Name[20];

	Intrinsic::ID IID;
	if (IsVariable && Name[17] != '.') {
	if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
	IID = Intrinsic::x86_avx2_psllv_q;
	else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
	IID = Intrinsic::x86_avx2_psllv_q_256;
	else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
	IID = Intrinsic::x86_avx2_psllv_d;
	else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
	IID = Intrinsic::x86_avx2_psllv_d_256;
	else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
	IID = Intrinsic::x86_avx512_psllv_w_128;
	else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
	IID = Intrinsic::x86_avx512_psllv_w_256;
	else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
	IID = Intrinsic::x86_avx512_psllv_w_512;
	else
	llvm_unreachable("Unexpected size");
	} else if (Name.endswith(".128")) {
	if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
	IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
	: Intrinsic::x86_sse2_psll_d;
	else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
	IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
	: Intrinsic::x86_sse2_psll_q;
	else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
	IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
	: Intrinsic::x86_sse2_psll_w;
	else
	llvm_unreachable("Unexpected size");
	} else if (Name.endswith(".256")) {
	if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
	IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
	: Intrinsic::x86_avx2_psll_d;
	else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
	IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
	: Intrinsic::x86_avx2_psll_q;
	else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
	IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
	: Intrinsic::x86_avx2_psll_w;
	else
	llvm_unreachable("Unexpected size");
	} else {
	if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
	IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
	IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
	Intrinsic::x86_avx512_psll_d_512;
	else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
	IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
	IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
	Intrinsic::x86_avx512_psll_q_512;
	else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
	IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
	: Intrinsic::x86_avx512_psll_w_512;
	else
	llvm_unreachable("Unexpected size");
	}

	Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
	} else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
	bool IsImmediate = Name[16] == 'i' \|\|
	(Name.size() > 18 && Name[18] == 'i');
	bool IsVariable = Name[16] == 'v';
	char Size = Name[16] == '.' ? Name[17] :
	Name[17] == '.' ? Name[18] :
	Name[18] == '.' ? Name[19] :
	Name[20];

	Intrinsic::ID IID;
	if (IsVariable && Name[17] != '.') {
	if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
	IID = Intrinsic::x86_avx2_psrlv_q;
	else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
	IID = Intrinsic::x86_avx2_psrlv_q_256;
	else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
	IID = Intrinsic::x86_avx2_psrlv_d;
	else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
	IID = Intrinsic::x86_avx2_psrlv_d_256;
	else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
	IID = Intrinsic::x86_avx512_psrlv_w_128;
	else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
	IID = Intrinsic::x86_avx512_psrlv_w_256;
	else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
	IID = Intrinsic::x86_avx512_psrlv_w_512;
	else
	llvm_unreachable("Unexpected size");
	} else if (Name.endswith(".128")) {
	if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
	IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
	: Intrinsic::x86_sse2_psrl_d;
	else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
	IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
	: Intrinsic::x86_sse2_psrl_q;
	else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
	IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
	: Intrinsic::x86_sse2_psrl_w;
	else
	llvm_unreachable("Unexpected size");
	} else if (Name.endswith(".256")) {
	if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
	IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
	: Intrinsic::x86_avx2_psrl_d;
	else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
	IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
	: Intrinsic::x86_avx2_psrl_q;
	else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
	IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
	: Intrinsic::x86_avx2_psrl_w;
	else
	llvm_unreachable("Unexpected size");
	} else {
	if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
	IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
	IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
	Intrinsic::x86_avx512_psrl_d_512;
	else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
	IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
	IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
	Intrinsic::x86_avx512_psrl_q_512;
	else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
	IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
	: Intrinsic::x86_avx512_psrl_w_512;
	else
	llvm_unreachable("Unexpected size");
	}

	Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
	} else if (IsX86 && Name.startswith("avx512.mask.psra")) {
	bool IsImmediate = Name[16] == 'i' \|\|
	(Name.size() > 18 && Name[18] == 'i');
	bool IsVariable = Name[16] == 'v';
	char Size = Name[16] == '.' ? Name[17] :
	Name[17] == '.' ? Name[18] :
	Name[18] == '.' ? Name[19] :
	Name[20];

	Intrinsic::ID IID;
	if (IsVariable && Name[17] != '.') {
	if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
	IID = Intrinsic::x86_avx2_psrav_d;
	else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
	IID = Intrinsic::x86_avx2_psrav_d_256;
	else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
	IID = Intrinsic::x86_avx512_psrav_w_128;
	else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
	IID = Intrinsic::x86_avx512_psrav_w_256;
	else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
	IID = Intrinsic::x86_avx512_psrav_w_512;
	else
	llvm_unreachable("Unexpected size");
	} else if (Name.endswith(".128")) {
	if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
	IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
	: Intrinsic::x86_sse2_psra_d;
	else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
	IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
	IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
	Intrinsic::x86_avx512_psra_q_128;
	else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
	IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
	: Intrinsic::x86_sse2_psra_w;
	else
	llvm_unreachable("Unexpected size");
	} else if (Name.endswith(".256")) {
	if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
	IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
	: Intrinsic::x86_avx2_psra_d;
	else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
	IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
	IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
	Intrinsic::x86_avx512_psra_q_256;
	else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
	IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
	: Intrinsic::x86_avx2_psra_w;
	else
	llvm_unreachable("Unexpected size");
	} else {
	if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
	IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
	IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
	Intrinsic::x86_avx512_psra_d_512;
	else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
	IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
	IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
	Intrinsic::x86_avx512_psra_q_512;
	else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
	IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
	: Intrinsic::x86_avx512_psra_w_512;
	else
	llvm_unreachable("Unexpected size");
	}

	Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
	} else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
	Rep = upgradeMaskedMove(Builder, *CI);
	} else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
	Rep = UpgradeMaskToInt(Builder, *CI);
	} else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
	Intrinsic::ID IID;
	if (Name.endswith("ps.128"))
	IID = Intrinsic::x86_avx_vpermilvar_ps;
	else if (Name.endswith("pd.128"))
	IID = Intrinsic::x86_avx_vpermilvar_pd;
	else if (Name.endswith("ps.256"))
	IID = Intrinsic::x86_avx_vpermilvar_ps_256;
	else if (Name.endswith("pd.256"))
	IID = Intrinsic::x86_avx_vpermilvar_pd_256;
	else if (Name.endswith("ps.512"))
	IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
	else if (Name.endswith("pd.512"))
	IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
	else
	llvm_unreachable("Unexpected vpermilvar intrinsic");

	Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
	Rep = Builder.CreateCall(Intrin,
	{ CI->getArgOperand(0), CI->getArgOperand(1) });
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.endswith(".movntdqa")) {
	Module *M = F->getParent();
	MDNode *Node = MDNode::get(
	C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));

	Value *Ptr = CI->getArgOperand(0);
	VectorType *VTy = cast<VectorType>(CI->getType());

	// Convert the type of the pointer to a pointer to the stored type.
	Value *BC =
	Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
	LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
	LI->setMetadata(M->getMDKindID("nontemporal"), Node);
	Rep = LI;
	} else if (IsNVVM && (Name == "abs.i" \|\| Name == "abs.ll")) {
	Value *Arg = CI->getArgOperand(0);
	Value *Neg = Builder.CreateNeg(Arg, "neg");
	Value *Cmp = Builder.CreateICmpSGE(
	Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
	Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
	} else if (IsNVVM && (Name == "max.i" \|\| Name == "max.ll" \|\|
	Name == "max.ui" \|\| Name == "max.ull")) {
	Value *Arg0 = CI->getArgOperand(0);
	Value *Arg1 = CI->getArgOperand(1);
	Value *Cmp = Name.endswith(".ui") \|\| Name.endswith(".ull")
	? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
	: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
	Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
	} else if (IsNVVM && (Name == "min.i" \|\| Name == "min.ll" \|\|
	Name == "min.ui" \|\| Name == "min.ull")) {
	Value *Arg0 = CI->getArgOperand(0);
	Value *Arg1 = CI->getArgOperand(1);
	Value *Cmp = Name.endswith(".ui") \|\| Name.endswith(".ull")
	? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
	: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
	Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
	} else if (IsNVVM && Name == "clz.ll") {
	// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
	Value *Arg = CI->getArgOperand(0);
	Value *Ctlz = Builder.CreateCall(
	Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
	{Arg->getType()}),
	{Arg, Builder.getFalse()}, "ctlz");
	Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
	} else if (IsNVVM && Name == "popc.ll") {
	// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
	// i64.
	Value *Arg = CI->getArgOperand(0);
	Value *Popc = Builder.CreateCall(
	Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
	{Arg->getType()}),
	Arg, "ctpop");
	Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
	} else if (IsNVVM && Name == "h2f") {
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(
	F->getParent(), Intrinsic::convert_from_fp16,
	{Builder.getFloatTy()}),
	CI->getArgOperand(0), "h2f");
	} else {
	llvm_unreachable("Unknown function for CallInst upgrade.");
	}

	if (Rep)
	CI->replaceAllUsesWith(Rep);
	CI->eraseFromParent();
	return;
	}

	CallInst *NewCall = nullptr;
	switch (NewFn->getIntrinsicID()) {
	default: {
	// Handle generic mangling change, but nothing else
	assert(
	(CI->getCalledFunction()->getName() != NewFn->getName()) &&
	"Unknown function for CallInst upgrade and isn't just a name change");
	CI->setCalledFunction(NewFn);
	return;
	}

	case Intrinsic::arm_neon_vld1:
	case Intrinsic::arm_neon_vld2:
	case Intrinsic::arm_neon_vld3:
	case Intrinsic::arm_neon_vld4:
	case Intrinsic::arm_neon_vld2lane:
	case Intrinsic::arm_neon_vld3lane:
	case Intrinsic::arm_neon_vld4lane:
	case Intrinsic::arm_neon_vst1:
	case Intrinsic::arm_neon_vst2:
	case Intrinsic::arm_neon_vst3:
	case Intrinsic::arm_neon_vst4:
	case Intrinsic::arm_neon_vst2lane:
	case Intrinsic::arm_neon_vst3lane:
	case Intrinsic::arm_neon_vst4lane: {
	SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
	CI->arg_operands().end());
	NewCall = Builder.CreateCall(NewFn, Args);
	break;
	}

	case Intrinsic::bitreverse:
	NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
	break;

	case Intrinsic::ctlz:
	case Intrinsic::cttz:
	assert(CI->getNumArgOperands() == 1 &&
	"Mismatch between function args and call args");
	NewCall =
	Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
	break;

	case Intrinsic::objectsize: {
	Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
	? Builder.getFalse()
	: CI->getArgOperand(2);
	NewCall = Builder.CreateCall(
	NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
	break;
	}

	case Intrinsic::ctpop:
	NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
	break;

	case Intrinsic::convert_from_fp16:
	NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
	break;

	case Intrinsic::x86_xop_vfrcz_ss:
	case Intrinsic::x86_xop_vfrcz_sd:
	NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
	break;

	case Intrinsic::x86_xop_vpermil2pd:
	case Intrinsic::x86_xop_vpermil2ps:
	case Intrinsic::x86_xop_vpermil2pd_256:
	case Intrinsic::x86_xop_vpermil2ps_256: {
	SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
	CI->arg_operands().end());
	VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
	VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
	Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
	NewCall = Builder.CreateCall(NewFn, Args);
	break;
	}

	case Intrinsic::x86_sse41_ptestc:
	case Intrinsic::x86_sse41_ptestz:
	case Intrinsic::x86_sse41_ptestnzc: {
	// The arguments for these intrinsics used to be v4f32, and changed
	// to v2i64. This is purely a nop, since those are bitwise intrinsics.
	// So, the only thing required is a bitcast for both arguments.
	// First, check the arguments have the old type.
	Value *Arg0 = CI->getArgOperand(0);
	if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
	return;

	// Old intrinsic, add bitcasts
	Value *Arg1 = CI->getArgOperand(1);

	Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);

	Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
	Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");

	NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
	break;
	}

	case Intrinsic::x86_sse41_insertps:
	case Intrinsic::x86_sse41_dppd:
	case Intrinsic::x86_sse41_dpps:
	case Intrinsic::x86_sse41_mpsadbw:
	case Intrinsic::x86_avx_dp_ps_256:
	case Intrinsic::x86_avx2_mpsadbw: {
	// Need to truncate the last argument from i32 to i8 -- this argument models
	// an inherently 8-bit immediate operand to these x86 instructions.
	SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
	CI->arg_operands().end());

	// Replace the last argument with a trunc.
	Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
	NewCall = Builder.CreateCall(NewFn, Args);
	break;
	}

	case Intrinsic::thread_pointer: {
	NewCall = Builder.CreateCall(NewFn, {});
	break;
	}

	case Intrinsic::invariant_start:
	case Intrinsic::invariant_end:
	case Intrinsic::masked_load:
	case Intrinsic::masked_store:
	case Intrinsic::masked_gather:
	case Intrinsic::masked_scatter: {
	SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
	CI->arg_operands().end());
	NewCall = Builder.CreateCall(NewFn, Args);
	break;
	}
	}
	assert(NewCall && "Should have either set this variable or returned through "
	"the default case");
	std::string Name = CI->getName();
	if (!Name.empty()) {
	CI->setName(Name + ".old");
	NewCall->setName(Name);
	}
	CI->replaceAllUsesWith(NewCall);
	CI->eraseFromParent();
	}

	void llvm::UpgradeCallsToIntrinsic(Function *F) {
	assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");

	// Check if this function should be upgraded and get the replacement function
	// if there is one.
	Function *NewFn;
	if (UpgradeIntrinsicFunction(F, NewFn)) {
	// Replace all users of the old function with the new function or new
	// instructions. This is not a range loop because the call is deleted.
	for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
	if (CallInst CI = dyn_cast<CallInst>(UI++))
	UpgradeIntrinsicCall(CI, NewFn);

	// Remove old function, no longer used, from the module.
	F->eraseFromParent();
	}
	}

	MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
	// Check if the tag uses struct-path aware TBAA format.
	if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
	return &MD;

	auto &Context = MD.getContext();
	if (MD.getNumOperands() == 3) {
	Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
	MDNode *ScalarType = MDNode::get(Context, Elts);
	// Create a MDNode <ScalarType, ScalarType, offset 0, const>
	Metadata *Elts2[] = {ScalarType, ScalarType,
	ConstantAsMetadata::get(
	Constant::getNullValue(Type::getInt64Ty(Context))),
	MD.getOperand(2)};
	return MDNode::get(Context, Elts2);
	}
	// Create a MDNode <MD, MD, offset 0>
	Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
	Type::getInt64Ty(Context)))};
	return MDNode::get(Context, Elts);
	}

	Instruction llvm::UpgradeBitCastInst(unsigned Opc, Value V, Type *DestTy,
	Instruction *&Temp) {
	if (Opc != Instruction::BitCast)
	return nullptr;

	Temp = nullptr;
	Type *SrcTy = V->getType();
	if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
	SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
	LLVMContext &Context = V->getContext();

	// We have no information about target data layout, so we assume that
	// the maximum pointer size is 64bit.
	Type *MidTy = Type::getInt64Ty(Context);
	Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);

	return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
	}

	return nullptr;
	}

	Value llvm::UpgradeBitCastExpr(unsigned Opc, Constant C, Type *DestTy) {
	if (Opc != Instruction::BitCast)
	return nullptr;

	Type *SrcTy = C->getType();
	if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
	SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
	LLVMContext &Context = C->getContext();

	// We have no information about target data layout, so we assume that
	// the maximum pointer size is 64bit.
	Type *MidTy = Type::getInt64Ty(Context);

	return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
	DestTy);
	}

	return nullptr;
	}

	/// Check the debug info version number, if it is out-dated, drop the debug
	/// info. Return true if module is modified.
	bool llvm::UpgradeDebugInfo(Module &M) {
	unsigned Version = getDebugMetadataVersionFromModule(M);
	if (Version == DEBUG_METADATA_VERSION)
	return false;

	bool RetCode = StripDebugInfo(M);
	if (RetCode) {
	DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
	M.getContext().diagnose(DiagVersion);
	}
	return RetCode;
	}

	bool llvm::UpgradeModuleFlags(Module &M) {
	- const NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
	+ NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
	if (!ModFlags)
	return false;

	- bool HasObjCFlag = false, HasClassProperties = false;
	+ bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
	for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
	MDNode *Op = ModFlags->getOperand(I);
	- if (Op->getNumOperands() < 2)
	+ if (Op->getNumOperands() != 3)
	continue;
	MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
	if (!ID)
	continue;
	if (ID->getString() == "Objective-C Image Info Version")
	HasObjCFlag = true;
	if (ID->getString() == "Objective-C Class Properties")
	HasClassProperties = true;
	+ // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
	+ // field was Error and now they are Max.
	+ if (ID->getString() == "PIC Level" \|\| ID->getString() == "PIE Level") {
	+ if (auto *Behavior =
	+ mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
	+ if (Behavior->getLimitedValue() == Module::Error) {
	+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
	+ Metadata *Ops[3] = {
	+ ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
	+ MDString::get(M.getContext(), ID->getString()),
	+ Op->getOperand(2)};
	+ ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
	+ Changed = true;
	+ }
	+ }
	+ }
	}
	+
	// "Objective-C Class Properties" is recently added for Objective-C. We
	// upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
	// flag of value 0, so we can correclty downgrade this flag when trying to
	// link an ObjC bitcode without this module flag with an ObjC bitcode with
	// this module flag.
	if (HasObjCFlag && !HasClassProperties) {
	M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
	(uint32_t)0);
	- return true;
	+ Changed = true;
	}
	- return false;
	+
	+ return Changed;
	}

	static bool isOldLoopArgument(Metadata *MD) {
	auto *T = dyn_cast_or_null<MDTuple>(MD);
	if (!T)
	return false;
	if (T->getNumOperands() < 1)
	return false;
	auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
	if (!S)
	return false;
	return S->getString().startswith("llvm.vectorizer.");
	}

	static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
	StringRef OldPrefix = "llvm.vectorizer.";
	assert(OldTag.startswith(OldPrefix) && "Expected old prefix");

	if (OldTag == "llvm.vectorizer.unroll")
	return MDString::get(C, "llvm.loop.interleave.count");

	return MDString::get(
	C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
	.str());
	}

	static Metadata upgradeLoopArgument(Metadata MD) {
	auto *T = dyn_cast_or_null<MDTuple>(MD);
	if (!T)
	return MD;
	if (T->getNumOperands() < 1)
	return MD;
	auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
	if (!OldTag)
	return MD;
	if (!OldTag->getString().startswith("llvm.vectorizer."))
	return MD;

	// This has an old tag. Upgrade it.
	SmallVector<Metadata *, 8> Ops;
	Ops.reserve(T->getNumOperands());
	Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
	for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
	Ops.push_back(T->getOperand(I));

	return MDTuple::get(T->getContext(), Ops);
	}

	MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
	auto *T = dyn_cast<MDTuple>(&N);
	if (!T)
	return &N;

	if (none_of(T->operands(), isOldLoopArgument))
	return &N;

	SmallVector<Metadata *, 8> Ops;
	Ops.reserve(T->getNumOperands());
	for (Metadata *MD : T->operands())
	Ops.push_back(upgradeLoopArgument(MD));

	return MDTuple::get(T->getContext(), Ops);
	}
	Index: head/contrib/llvm/lib/Object/COFFModuleDefinition.cpp
	===================================================================
	--- head/contrib/llvm/lib/Object/COFFModuleDefinition.cpp (revision 322854)
	+++ head/contrib/llvm/lib/Object/COFFModuleDefinition.cpp (revision 322855)
	@@ -1,331 +1,337 @@
	//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// Windows-specific.
	// A parser for the module-definition file (.def file).
	//
	// The format of module-definition files are described in this document:
	// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/Object/COFFModuleDefinition.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Object/COFF.h"
	#include "llvm/Object/COFFImportFile.h"
	#include "llvm/Object/Error.h"
	#include "llvm/Support/Error.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/raw_ostream.h"

	using namespace llvm::COFF;
	using namespace llvm;

	namespace llvm {
	namespace object {

	enum Kind {
	Unknown,
	Eof,
	Identifier,
	Comma,
	Equal,
	KwBase,
	KwConstant,
	KwData,
	KwExports,
	KwHeapsize,
	KwLibrary,
	KwName,
	KwNoname,
	KwPrivate,
	KwStacksize,
	KwVersion,
	};

	struct Token {
	explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
	Kind K;
	StringRef Value;
	};

	static bool isDecorated(StringRef Sym, bool MingwDef) {
	// mingw does not prepend "_".
	return (!MingwDef && Sym.startswith("_")) \|\| Sym.startswith("@") \|\|
	Sym.startswith("?");
	}

	static Error createError(const Twine &Err) {
	return make_error<StringError>(StringRef(Err.str()),
	object_error::parse_failed);
	}

	class Lexer {
	public:
	Lexer(StringRef S) : Buf(S) {}

	Token lex() {
	Buf = Buf.trim();
	if (Buf.empty())
	return Token(Eof);

	switch (Buf[0]) {
	case '\0':
	return Token(Eof);
	case ';': {
	size_t End = Buf.find('\n');
	Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
	return lex();
	}
	case '=':
	Buf = Buf.drop_front();
	// GNU dlltool accepts both = and ==.
	if (Buf.startswith("="))
	Buf = Buf.drop_front();
	return Token(Equal, "=");
	case ',':
	Buf = Buf.drop_front();
	return Token(Comma, ",");
	case '"': {
	StringRef S;
	std::tie(S, Buf) = Buf.substr(1).split('"');
	return Token(Identifier, S);
	}
	default: {
	size_t End = Buf.find_first_of("=,\r\n \t\v");
	StringRef Word = Buf.substr(0, End);
	Kind K = llvm::StringSwitch<Kind>(Word)
	.Case("BASE", KwBase)
	.Case("CONSTANT", KwConstant)
	.Case("DATA", KwData)
	.Case("EXPORTS", KwExports)
	.Case("HEAPSIZE", KwHeapsize)
	.Case("LIBRARY", KwLibrary)
	.Case("NAME", KwName)
	.Case("NONAME", KwNoname)
	.Case("PRIVATE", KwPrivate)
	.Case("STACKSIZE", KwStacksize)
	.Case("VERSION", KwVersion)
	.Default(Identifier);
	Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
	return Token(K, Word);
	}
	}
	}

	private:
	StringRef Buf;
	};

	class Parser {
	public:
	explicit Parser(StringRef S, MachineTypes M, bool B)
	: Lex(S), Machine(M), MingwDef(B) {}

	Expected<COFFModuleDefinition> parse() {
	do {
	if (Error Err = parseOne())
	return std::move(Err);
	} while (Tok.K != Eof);
	return Info;
	}

	private:
	void read() {
	if (Stack.empty()) {
	Tok = Lex.lex();
	return;
	}
	Tok = Stack.back();
	Stack.pop_back();
	}

	Error readAsInt(uint64_t *I) {
	read();
	if (Tok.K != Identifier \|\| Tok.Value.getAsInteger(10, *I))
	return createError("integer expected");
	return Error::success();
	}

	Error expect(Kind Expected, StringRef Msg) {
	read();
	if (Tok.K != Expected)
	return createError(Msg);
	return Error::success();
	}

	void unget() { Stack.push_back(Tok); }

	Error parseOne() {
	read();
	switch (Tok.K) {
	case Eof:
	return Error::success();
	case KwExports:
	for (;;) {
	read();
	if (Tok.K != Identifier) {
	unget();
	return Error::success();
	}
	if (Error Err = parseExport())
	return Err;
	}
	case KwHeapsize:
	return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
	case KwStacksize:
	return parseNumbers(&Info.StackReserve, &Info.StackCommit);
	case KwLibrary:
	case KwName: {
	bool IsDll = Tok.K == KwLibrary; // Check before parseName.
	std::string Name;
	if (Error Err = parseName(&Name, &Info.ImageBase))
	return Err;

	Info.ImportName = Name;

	// Set the output file, but don't override /out if it was already passed.
	if (Info.OutputFile.empty()) {
	Info.OutputFile = Name;
	// Append the appropriate file extension if not already present.
	if (!sys::path::has_extension(Name))
	Info.OutputFile += IsDll ? ".dll" : ".exe";
	}

	return Error::success();
	}
	case KwVersion:
	return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
	default:
	return createError("unknown directive: " + Tok.Value);
	}
	}

	Error parseExport() {
	COFFShortExport E;
	E.Name = Tok.Value;
	read();
	if (Tok.K == Equal) {
	read();
	if (Tok.K != Identifier)
	return createError("identifier expected, but got " + Tok.Value);
	E.ExtName = E.Name;
	E.Name = Tok.Value;
	} else {
	unget();
	}

	if (Machine == IMAGE_FILE_MACHINE_I386) {
	if (!isDecorated(E.Name, MingwDef))
	E.Name = (std::string("_").append(E.Name));
	if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
	E.ExtName = (std::string("_").append(E.ExtName));
	}

	for (;;) {
	read();
	if (Tok.K == Identifier && Tok.Value[0] == '@') {
	- Tok.Value.drop_front().getAsInteger(10, E.Ordinal);
	+ if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
	+ // Not an ordinal modifier at all, but the next export (fastcall
	+ // decorated) - complete the current one.
	+ unget();
	+ Info.Exports.push_back(E);
	+ return Error::success();
	+ }
	read();
	if (Tok.K == KwNoname) {
	E.Noname = true;
	} else {
	unget();
	}
	continue;
	}
	if (Tok.K == KwData) {
	E.Data = true;
	continue;
	}
	if (Tok.K == KwConstant) {
	E.Constant = true;
	continue;
	}
	if (Tok.K == KwPrivate) {
	E.Private = true;
	continue;
	}
	unget();
	Info.Exports.push_back(E);
	return Error::success();
	}
	}

	// HEAPSIZE/STACKSIZE reserve[,commit]
	Error parseNumbers(uint64_t Reserve, uint64_t Commit) {
	if (Error Err = readAsInt(Reserve))
	return Err;
	read();
	if (Tok.K != Comma) {
	unget();
	Commit = nullptr;
	return Error::success();
	}
	if (Error Err = readAsInt(Commit))
	return Err;
	return Error::success();
	}

	// NAME outputPath [BASE=address]
	Error parseName(std::string Out, uint64_t Baseaddr) {
	read();
	if (Tok.K == Identifier) {
	*Out = Tok.Value;
	} else {
	*Out = "";
	unget();
	return Error::success();
	}
	read();
	if (Tok.K == KwBase) {
	if (Error Err = expect(Equal, "'=' expected"))
	return Err;
	if (Error Err = readAsInt(Baseaddr))
	return Err;
	} else {
	unget();
	*Baseaddr = 0;
	}
	return Error::success();
	}

	// VERSION major[.minor]
	Error parseVersion(uint32_t Major, uint32_t Minor) {
	read();
	if (Tok.K != Identifier)
	return createError("identifier expected, but got " + Tok.Value);
	StringRef V1, V2;
	std::tie(V1, V2) = Tok.Value.split('.');
	if (V1.getAsInteger(10, *Major))
	return createError("integer expected, but got " + Tok.Value);
	if (V2.empty())
	*Minor = 0;
	else if (V2.getAsInteger(10, *Minor))
	return createError("integer expected, but got " + Tok.Value);
	return Error::success();
	}

	Lexer Lex;
	Token Tok;
	std::vector<Token> Stack;
	MachineTypes Machine;
	COFFModuleDefinition Info;
	bool MingwDef;
	};

	Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
	MachineTypes Machine,
	bool MingwDef) {
	return Parser(MB.getBuffer(), Machine, MingwDef).parse();
	}

	} // namespace object
	} // namespace llvm
	Index: head/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
	===================================================================
	--- head/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp (revision 322854)
	+++ head/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp (revision 322855)
	@@ -1,14085 +1,14101 @@
	//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the interfaces that ARM uses to lower LLVM code into a
	// selection DAG.
	//
	//===----------------------------------------------------------------------===//

	#include "ARMISelLowering.h"
	#include "ARMBaseInstrInfo.h"
	#include "ARMBaseRegisterInfo.h"
	#include "ARMCallingConv.h"
	#include "ARMConstantPoolValue.h"
	#include "ARMMachineFunctionInfo.h"
	#include "ARMPerfectShuffle.h"
	#include "ARMRegisterInfo.h"
	#include "ARMSelectionDAGInfo.h"
	#include "ARMSubtarget.h"
	#include "MCTargetDesc/ARMAddressingModes.h"
	#include "MCTargetDesc/ARMBaseInfo.h"
	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/BitVector.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/Analysis/VectorUtils.h"
	#include "llvm/CodeGen/CallingConvLower.h"
	#include "llvm/CodeGen/ISDOpcodes.h"
	#include "llvm/CodeGen/IntrinsicLowering.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineConstantPool.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineJumpTableInfo.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/MachineOperand.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/MachineValueType.h"
	#include "llvm/CodeGen/RuntimeLibcalls.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/SelectionDAGNodes.h"
	#include "llvm/CodeGen/ValueTypes.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugLoc.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalAlias.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/InlineAsm.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/User.h"
	#include "llvm/IR/Value.h"
	#include "llvm/MC/MCInstrDesc.h"
	#include "llvm/MC/MCInstrItineraries.h"
	#include "llvm/MC/MCRegisterInfo.h"
	#include "llvm/MC/MCSchedule.h"
	#include "llvm/Support/AtomicOrdering.h"
	#include "llvm/Support/BranchProbability.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetInstrInfo.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Target/TargetOptions.h"
	#include <algorithm>
	#include <cassert>
	#include <cstdint>
	#include <cstdlib>
	#include <iterator>
	#include <limits>
	#include <string>
	#include <tuple>
	#include <utility>
	#include <vector>

	using namespace llvm;

	#define DEBUG_TYPE "arm-isel"

	STATISTIC(NumTailCalls, "Number of tail calls");
	STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
	STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
	STATISTIC(NumConstpoolPromoted,
	"Number of constants with their storage promoted into constant pools");

	static cl::opt<bool>
	ARMInterworking("arm-interworking", cl::Hidden,
	cl::desc("Enable / disable ARM interworking (for debugging only)"),
	cl::init(true));

	static cl::opt<bool> EnableConstpoolPromotion(
	"arm-promote-constant", cl::Hidden,
	cl::desc("Enable / disable promotion of unnamed_addr constants into "
	"constant pools"),
	cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
	static cl::opt<unsigned> ConstpoolPromotionMaxSize(
	"arm-promote-constant-max-size", cl::Hidden,
	cl::desc("Maximum size of constant to promote into a constant pool"),
	cl::init(64));
	static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
	"arm-promote-constant-max-total", cl::Hidden,
	cl::desc("Maximum size of ALL constants to promote into a constant pool"),
	cl::init(128));

	// The APCS parameter registers.
	static const MCPhysReg GPRArgRegs[] = {
	ARM::R0, ARM::R1, ARM::R2, ARM::R3
	};

	void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
	MVT PromotedBitwiseVT) {
	if (VT != PromotedLdStVT) {
	setOperationAction(ISD::LOAD, VT, Promote);
	AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);

	setOperationAction(ISD::STORE, VT, Promote);
	AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
	}

	MVT ElemTy = VT.getVectorElementType();
	if (ElemTy != MVT::f64)
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	if (ElemTy == MVT::i32) {
	setOperationAction(ISD::SINT_TO_FP, VT, Custom);
	setOperationAction(ISD::UINT_TO_FP, VT, Custom);
	setOperationAction(ISD::FP_TO_SINT, VT, Custom);
	setOperationAction(ISD::FP_TO_UINT, VT, Custom);
	} else {
	setOperationAction(ISD::SINT_TO_FP, VT, Expand);
	setOperationAction(ISD::UINT_TO_FP, VT, Expand);
	setOperationAction(ISD::FP_TO_SINT, VT, Expand);
	setOperationAction(ISD::FP_TO_UINT, VT, Expand);
	}
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
	setOperationAction(ISD::SELECT, VT, Expand);
	setOperationAction(ISD::SELECT_CC, VT, Expand);
	setOperationAction(ISD::VSELECT, VT, Expand);
	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
	if (VT.isInteger()) {
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	setOperationAction(ISD::SRL, VT, Custom);
	}

	// Promote all bit-wise operations.
	if (VT.isInteger() && VT != PromotedBitwiseVT) {
	setOperationAction(ISD::AND, VT, Promote);
	AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
	setOperationAction(ISD::OR, VT, Promote);
	AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
	setOperationAction(ISD::XOR, VT, Promote);
	AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
	}

	// Neon does not support vector divide/remainder operations.
	setOperationAction(ISD::SDIV, VT, Expand);
	setOperationAction(ISD::UDIV, VT, Expand);
	setOperationAction(ISD::FDIV, VT, Expand);
	setOperationAction(ISD::SREM, VT, Expand);
	setOperationAction(ISD::UREM, VT, Expand);
	setOperationAction(ISD::FREM, VT, Expand);

	if (!VT.isFloatingPoint() &&
	VT != MVT::v2i64 && VT != MVT::v1i64)
	for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
	setOperationAction(Opcode, VT, Legal);
	}

	void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
	addRegisterClass(VT, &ARM::DPRRegClass);
	addTypeForNEON(VT, MVT::f64, MVT::v2i32);
	}

	void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
	addRegisterClass(VT, &ARM::DPairRegClass);
	addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
	}

	ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
	const ARMSubtarget &STI)
	: TargetLowering(TM), Subtarget(&STI) {
	RegInfo = Subtarget->getRegisterInfo();
	Itins = Subtarget->getInstrItineraryData();

	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);

	if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
	!Subtarget->isTargetWatchOS()) {
	const auto &E = Subtarget->getTargetTriple().getEnvironment();

	bool IsHFTarget = E == Triple::EABIHF \|\| E == Triple::GNUEABIHF \|\|
	E == Triple::MuslEABIHF;
	// Windows is a special case. Technically, we will replace all of the "GNU"
	// calls with calls to MSVCRT if appropriate and adjust the calling
	// convention then.
	IsHFTarget = IsHFTarget \|\| Subtarget->isTargetWindows();

	for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
	setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
	IsHFTarget ? CallingConv::ARM_AAPCS_VFP
	: CallingConv::ARM_AAPCS);
	}

	if (Subtarget->isTargetMachO()) {
	// Uses VFP for Thumb libfuncs if available.
	if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
	Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
	static const struct {
	const RTLIB::Libcall Op;
	const char * const Name;
	const ISD::CondCode Cond;
	} LibraryCalls[] = {
	// Single-precision floating-point arithmetic.
	{ RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
	{ RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
	{ RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
	{ RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },

	// Double-precision floating-point arithmetic.
	{ RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
	{ RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
	{ RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
	{ RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },

	// Single-precision comparisons.
	{ RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
	{ RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
	{ RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
	{ RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
	{ RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
	{ RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
	{ RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
	{ RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },

	// Double-precision comparisons.
	{ RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
	{ RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
	{ RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
	{ RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
	{ RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
	{ RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
	{ RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
	{ RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },

	// Floating-point to integer conversions.
	// i64 conversions are done via library routines even when generating VFP
	// instructions, so use the same ones.
	{ RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
	{ RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
	{ RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
	{ RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },

	// Conversions between floating types.
	{ RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
	{ RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },

	// Integer to floating-point conversions.
	// i64 conversions are done via library routines even when generating VFP
	// instructions, so use the same ones.
	// FIXME: There appears to be some naming inconsistency in ARM libgcc:
	// e.g., __floatunsidf vs. __floatunssidfvfp.
	{ RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
	{ RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
	{ RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
	{ RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
	};

	for (const auto &LC : LibraryCalls) {
	setLibcallName(LC.Op, LC.Name);
	if (LC.Cond != ISD::SETCC_INVALID)
	setCmpLibcallCC(LC.Op, LC.Cond);
	}
	}

	// Set the correct calling convention for ARMv7k WatchOS. It's just
	// AAPCS_VFP for functions as simple as libcalls.
	if (Subtarget->isTargetWatchABI()) {
	for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
	setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
	}
	}

	// These libcalls are not available in 32-bit.
	setLibcallName(RTLIB::SHL_I128, nullptr);
	setLibcallName(RTLIB::SRL_I128, nullptr);
	setLibcallName(RTLIB::SRA_I128, nullptr);

	// RTLIB
	if (Subtarget->isAAPCS_ABI() &&
	(Subtarget->isTargetAEABI() \|\| Subtarget->isTargetGNUAEABI() \|\|
	Subtarget->isTargetMuslAEABI() \|\| Subtarget->isTargetAndroid())) {
	static const struct {
	const RTLIB::Libcall Op;
	const char * const Name;
	const CallingConv::ID CC;
	const ISD::CondCode Cond;
	} LibraryCalls[] = {
	// Double-precision floating-point arithmetic helper functions
	// RTABI chapter 4.1.2, Table 2
	{ RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

	// Double-precision floating-point comparison helper functions
	// RTABI chapter 4.1.2, Table 3
	{ RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
	{ RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
	{ RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
	{ RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
	{ RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
	{ RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
	{ RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
	{ RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },

	// Single-precision floating-point arithmetic helper functions
	// RTABI chapter 4.1.2, Table 4
	{ RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

	// Single-precision floating-point comparison helper functions
	// RTABI chapter 4.1.2, Table 5
	{ RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
	{ RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
	{ RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
	{ RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
	{ RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
	{ RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
	{ RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
	{ RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },

	// Floating-point to integer conversions.
	// RTABI chapter 4.1.2, Table 6
	{ RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

	// Conversions between floating types.
	// RTABI chapter 4.1.2, Table 7
	{ RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

	// Integer to floating-point conversions.
	// RTABI chapter 4.1.2, Table 8
	{ RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

	// Long long helper functions
	// RTABI chapter 4.2, Table 9
	{ RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

	// Integer division functions
	// RTABI chapter 4.3.1
	{ RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	};

	for (const auto &LC : LibraryCalls) {
	setLibcallName(LC.Op, LC.Name);
	setLibcallCallingConv(LC.Op, LC.CC);
	if (LC.Cond != ISD::SETCC_INVALID)
	setCmpLibcallCC(LC.Op, LC.Cond);
	}

	// EABI dependent RTLIB
	if (TM.Options.EABIVersion == EABI::EABI4 \|\|
	TM.Options.EABIVersion == EABI::EABI5) {
	static const struct {
	const RTLIB::Libcall Op;
	const char *const Name;
	const CallingConv::ID CC;
	const ISD::CondCode Cond;
	} MemOpsLibraryCalls[] = {
	// Memory operations
	// RTABI chapter 4.3.4
	{ RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	{ RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
	};

	for (const auto &LC : MemOpsLibraryCalls) {
	setLibcallName(LC.Op, LC.Name);
	setLibcallCallingConv(LC.Op, LC.CC);
	if (LC.Cond != ISD::SETCC_INVALID)
	setCmpLibcallCC(LC.Op, LC.Cond);
	}
	}
	}

	if (Subtarget->isTargetWindows()) {
	static const struct {
	const RTLIB::Libcall Op;
	const char * const Name;
	const CallingConv::ID CC;
	} LibraryCalls[] = {
	{ RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
	{ RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
	{ RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
	{ RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
	{ RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
	{ RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
	{ RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
	{ RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
	};

	for (const auto &LC : LibraryCalls) {
	setLibcallName(LC.Op, LC.Name);
	setLibcallCallingConv(LC.Op, LC.CC);
	}
	}

	// Use divmod compiler-rt calls for iOS 5.0 and later.
	if (Subtarget->isTargetMachO() &&
	!(Subtarget->isTargetIOS() &&
	Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
	setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
	setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
	}

	// The half <-> float conversion functions are always soft-float on
	// non-watchos platforms, but are needed for some targets which use a
	// hard-float calling convention by default.
	if (!Subtarget->isTargetWatchABI()) {
	if (Subtarget->isAAPCS_ABI()) {
	setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
	setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
	setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
	} else {
	setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
	setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
	setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
	}
	}

	// In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
	// a __gnu_ prefix (which is the default).
	if (Subtarget->isTargetAEABI()) {
	static const struct {
	const RTLIB::Libcall Op;
	const char * const Name;
	const CallingConv::ID CC;
	} LibraryCalls[] = {
	{ RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
	{ RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
	{ RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
	};

	for (const auto &LC : LibraryCalls) {
	setLibcallName(LC.Op, LC.Name);
	setLibcallCallingConv(LC.Op, LC.CC);
	}
	}

	if (Subtarget->isThumb1Only())
	addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
	else
	addRegisterClass(MVT::i32, &ARM::GPRRegClass);

	if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
	!Subtarget->isThumb1Only()) {
	addRegisterClass(MVT::f32, &ARM::SPRRegClass);
	addRegisterClass(MVT::f64, &ARM::DPRRegClass);
	}

	for (MVT VT : MVT::vector_valuetypes()) {
	for (MVT InnerVT : MVT::vector_valuetypes()) {
	setTruncStoreAction(VT, InnerVT, Expand);
	setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
	setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
	}

	setOperationAction(ISD::MULHS, VT, Expand);
	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
	setOperationAction(ISD::MULHU, VT, Expand);
	setOperationAction(ISD::UMUL_LOHI, VT, Expand);

	setOperationAction(ISD::BSWAP, VT, Expand);
	}

	setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
	setOperationAction(ISD::ConstantFP, MVT::f64, Custom);

	setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
	setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);

	if (Subtarget->hasNEON()) {
	addDRTypeForNEON(MVT::v2f32);
	addDRTypeForNEON(MVT::v8i8);
	addDRTypeForNEON(MVT::v4i16);
	addDRTypeForNEON(MVT::v2i32);
	addDRTypeForNEON(MVT::v1i64);

	addQRTypeForNEON(MVT::v4f32);
	addQRTypeForNEON(MVT::v2f64);
	addQRTypeForNEON(MVT::v16i8);
	addQRTypeForNEON(MVT::v8i16);
	addQRTypeForNEON(MVT::v4i32);
	addQRTypeForNEON(MVT::v2i64);

	// v2f64 is legal so that QR subregs can be extracted as f64 elements, but
	// neither Neon nor VFP support any arithmetic operations on it.
	// The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
	// supported for v4f32.
	setOperationAction(ISD::FADD, MVT::v2f64, Expand);
	setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
	setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
	// FIXME: Code duplication: FDIV and FREM are expanded always, see
	// ARMTargetLowering::addTypeForNEON method for details.
	setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
	setOperationAction(ISD::FREM, MVT::v2f64, Expand);
	// FIXME: Create unittest.
	// In another words, find a way when "copysign" appears in DAG with vector
	// operands.
	setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
	// FIXME: Code duplication: SETCC has custom operation action, see
	// ARMTargetLowering::addTypeForNEON method for details.
	setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
	// FIXME: Create unittest for FNEG and for FABS.
	setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
	setOperationAction(ISD::FABS, MVT::v2f64, Expand);
	setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
	setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
	setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
	setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
	setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
	setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
	setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
	setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
	setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
	// FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
	setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
	setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
	setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
	setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
	setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
	setOperationAction(ISD::FMA, MVT::v2f64, Expand);

	setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
	setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
	setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
	setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
	setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
	setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
	setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
	setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
	setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
	setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
	setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
	setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
	setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
	setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);

	// Mark v2f32 intrinsics.
	setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
	setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
	setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
	setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
	setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
	setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
	setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
	setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
	setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
	setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
	setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
	setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
	setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
	setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);

	// Neon does not support some operations on v1i64 and v2i64 types.
	setOperationAction(ISD::MUL, MVT::v1i64, Expand);
	// Custom handling for some quad-vector types to detect VMULL.
	setOperationAction(ISD::MUL, MVT::v8i16, Custom);
	setOperationAction(ISD::MUL, MVT::v4i32, Custom);
	setOperationAction(ISD::MUL, MVT::v2i64, Custom);
	// Custom handling for some vector types to avoid expensive expansions
	setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
	setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
	setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
	setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
	// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
	// a destination type that is wider than the source, and nor does
	// it have a FP_TO_[SU]INT instruction with a narrower destination than
	// source.
	setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);

	setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
	setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);

	// NEON does not have single instruction CTPOP for vectors with element
	// types wider than 8-bits. However, custom lowering can leverage the
	// v8i8/v16i8 vcnt instruction.
	setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
	setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
	setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
	setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
	setOperationAction(ISD::CTPOP, MVT::v1i64, Expand);
	setOperationAction(ISD::CTPOP, MVT::v2i64, Expand);

	setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
	setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);

	// NEON does not have single instruction CTTZ for vectors.
	setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
	setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
	setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
	setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);

	setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
	setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
	setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
	setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);

	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);

	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);

	// NEON only has FMA instructions as of VFP4.
	if (!Subtarget->hasVFP4()) {
	setOperationAction(ISD::FMA, MVT::v2f32, Expand);
	setOperationAction(ISD::FMA, MVT::v4f32, Expand);
	}

	setTargetDAGCombine(ISD::INTRINSIC_VOID);
	setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
	setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
	setTargetDAGCombine(ISD::SHL);
	setTargetDAGCombine(ISD::SRL);
	setTargetDAGCombine(ISD::SRA);
	setTargetDAGCombine(ISD::SIGN_EXTEND);
	setTargetDAGCombine(ISD::ZERO_EXTEND);
	setTargetDAGCombine(ISD::ANY_EXTEND);
	setTargetDAGCombine(ISD::BUILD_VECTOR);
	setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
	setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
	setTargetDAGCombine(ISD::STORE);
	setTargetDAGCombine(ISD::FP_TO_SINT);
	setTargetDAGCombine(ISD::FP_TO_UINT);
	setTargetDAGCombine(ISD::FDIV);
	setTargetDAGCombine(ISD::LOAD);

	// It is legal to extload from v4i8 to v4i16 or v4i32.
	for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
	MVT::v2i32}) {
	for (MVT VT : MVT::integer_vector_valuetypes()) {
	setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
	setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
	setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
	}
	}
	}

	if (Subtarget->isFPOnlySP()) {
	// When targeting a floating-point unit with only single-precision
	// operations, f64 is legal for the few double-precision instructions which
	// are present However, no double-precision operations other than moves,
	// loads and stores are provided by the hardware.
	setOperationAction(ISD::FADD, MVT::f64, Expand);
	setOperationAction(ISD::FSUB, MVT::f64, Expand);
	setOperationAction(ISD::FMUL, MVT::f64, Expand);
	setOperationAction(ISD::FMA, MVT::f64, Expand);
	setOperationAction(ISD::FDIV, MVT::f64, Expand);
	setOperationAction(ISD::FREM, MVT::f64, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
	setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
	setOperationAction(ISD::FNEG, MVT::f64, Expand);
	setOperationAction(ISD::FABS, MVT::f64, Expand);
	setOperationAction(ISD::FSQRT, MVT::f64, Expand);
	setOperationAction(ISD::FSIN, MVT::f64, Expand);
	setOperationAction(ISD::FCOS, MVT::f64, Expand);
	setOperationAction(ISD::FPOW, MVT::f64, Expand);
	setOperationAction(ISD::FLOG, MVT::f64, Expand);
	setOperationAction(ISD::FLOG2, MVT::f64, Expand);
	setOperationAction(ISD::FLOG10, MVT::f64, Expand);
	setOperationAction(ISD::FEXP, MVT::f64, Expand);
	setOperationAction(ISD::FEXP2, MVT::f64, Expand);
	setOperationAction(ISD::FCEIL, MVT::f64, Expand);
	setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
	setOperationAction(ISD::FRINT, MVT::f64, Expand);
	setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
	setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
	setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
	}

	computeRegisterProperties(Subtarget->getRegisterInfo());

	// ARM does not have floating-point extending loads.
	for (MVT VT : MVT::fp_valuetypes()) {
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
	}

	// ... or truncating stores
	setTruncStoreAction(MVT::f64, MVT::f32, Expand);
	setTruncStoreAction(MVT::f32, MVT::f16, Expand);
	setTruncStoreAction(MVT::f64, MVT::f16, Expand);

	// ARM does not have i1 sign extending load.
	for (MVT VT : MVT::integer_valuetypes())
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);

	// ARM supports all 4 flavors of integer indexed load / store.
	if (!Subtarget->isThumb1Only()) {
	for (unsigned im = (unsigned)ISD::PRE_INC;
	im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
	setIndexedLoadAction(im, MVT::i1, Legal);
	setIndexedLoadAction(im, MVT::i8, Legal);
	setIndexedLoadAction(im, MVT::i16, Legal);
	setIndexedLoadAction(im, MVT::i32, Legal);
	setIndexedStoreAction(im, MVT::i1, Legal);
	setIndexedStoreAction(im, MVT::i8, Legal);
	setIndexedStoreAction(im, MVT::i16, Legal);
	setIndexedStoreAction(im, MVT::i32, Legal);
	}
	} else {
	// Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
	setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
	setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
	}

	setOperationAction(ISD::SADDO, MVT::i32, Custom);
	setOperationAction(ISD::UADDO, MVT::i32, Custom);
	setOperationAction(ISD::SSUBO, MVT::i32, Custom);
	setOperationAction(ISD::USUBO, MVT::i32, Custom);

	// i64 operation support.
	setOperationAction(ISD::MUL, MVT::i64, Expand);
	setOperationAction(ISD::MULHU, MVT::i32, Expand);
	if (Subtarget->isThumb1Only()) {
	setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
	setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
	}
	if (Subtarget->isThumb1Only() \|\| !Subtarget->hasV6Ops()
	\|\| (Subtarget->isThumb2() && !Subtarget->hasDSP()))
	setOperationAction(ISD::MULHS, MVT::i32, Expand);

	setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
	setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
	setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
	setOperationAction(ISD::SRL, MVT::i64, Custom);
	setOperationAction(ISD::SRA, MVT::i64, Custom);
	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);

	setOperationAction(ISD::ADDC, MVT::i32, Custom);
	setOperationAction(ISD::ADDE, MVT::i32, Custom);
	setOperationAction(ISD::SUBC, MVT::i32, Custom);
	setOperationAction(ISD::SUBE, MVT::i32, Custom);

	if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
	setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);

	// ARM does not have ROTL.
	setOperationAction(ISD::ROTL, MVT::i32, Expand);
	for (MVT VT : MVT::vector_valuetypes()) {
	setOperationAction(ISD::ROTL, VT, Expand);
	setOperationAction(ISD::ROTR, VT, Expand);
	}
	setOperationAction(ISD::CTTZ, MVT::i32, Custom);
	setOperationAction(ISD::CTPOP, MVT::i32, Expand);
	if (!Subtarget->hasV5TOps() \|\| Subtarget->isThumb1Only())
	setOperationAction(ISD::CTLZ, MVT::i32, Expand);

	// @llvm.readcyclecounter requires the Performance Monitors extension.
	// Default to the 0 expansion on unsupported platforms.
	// FIXME: Technically there are older ARM CPUs that have
	// implementation-specific ways of obtaining this information.
	if (Subtarget->hasPerfMon())
	setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);

	// Only ARMv6 has BSWAP.
	if (!Subtarget->hasV6Ops())
	setOperationAction(ISD::BSWAP, MVT::i32, Expand);

	bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
	: Subtarget->hasDivideInARMMode();
	if (!hasDivide) {
	// These are expanded into libcalls if the cpu doesn't have HW divider.
	setOperationAction(ISD::SDIV, MVT::i32, LibCall);
	setOperationAction(ISD::UDIV, MVT::i32, LibCall);
	}

	if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
	setOperationAction(ISD::SDIV, MVT::i32, Custom);
	setOperationAction(ISD::UDIV, MVT::i32, Custom);

	setOperationAction(ISD::SDIV, MVT::i64, Custom);
	setOperationAction(ISD::UDIV, MVT::i64, Custom);
	}

	setOperationAction(ISD::SREM, MVT::i32, Expand);
	setOperationAction(ISD::UREM, MVT::i32, Expand);

	// Register based DivRem for AEABI (RTABI 4.2)
	if (Subtarget->isTargetAEABI() \|\| Subtarget->isTargetAndroid() \|\|
	Subtarget->isTargetGNUAEABI() \|\| Subtarget->isTargetMuslAEABI() \|\|
	Subtarget->isTargetWindows()) {
	setOperationAction(ISD::SREM, MVT::i64, Custom);
	setOperationAction(ISD::UREM, MVT::i64, Custom);
	HasStandaloneRem = false;

	if (Subtarget->isTargetWindows()) {
	const struct {
	const RTLIB::Libcall Op;
	const char * const Name;
	const CallingConv::ID CC;
	} LibraryCalls[] = {
	{ RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
	{ RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
	{ RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
	{ RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },

	{ RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
	{ RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
	{ RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
	{ RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
	};

	for (const auto &LC : LibraryCalls) {
	setLibcallName(LC.Op, LC.Name);
	setLibcallCallingConv(LC.Op, LC.CC);
	}
	} else {
	const struct {
	const RTLIB::Libcall Op;
	const char * const Name;
	const CallingConv::ID CC;
	} LibraryCalls[] = {
	{ RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
	{ RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
	{ RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
	{ RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },

	{ RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
	{ RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
	{ RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
	{ RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
	};

	for (const auto &LC : LibraryCalls) {
	setLibcallName(LC.Op, LC.Name);
	setLibcallCallingConv(LC.Op, LC.CC);
	}
	}

	setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
	setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
	setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
	setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
	} else {
	setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
	setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
	}

	if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
	for (auto &VT : {MVT::f32, MVT::f64})
	setOperationAction(ISD::FPOWI, VT, Custom);

	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
	setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
	setOperationAction(ISD::BlockAddress, MVT::i32, Custom);

	setOperationAction(ISD::TRAP, MVT::Other, Legal);

	// Use the default implementation.
	setOperationAction(ISD::VASTART, MVT::Other, Custom);
	setOperationAction(ISD::VAARG, MVT::Other, Expand);
	setOperationAction(ISD::VACOPY, MVT::Other, Expand);
	setOperationAction(ISD::VAEND, MVT::Other, Expand);
	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);

	if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
	else
	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);

	// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
	// the default expansion.
	InsertFencesForAtomic = false;
	if (Subtarget->hasAnyDataBarrier() &&
	(!Subtarget->isThumb() \|\| Subtarget->hasV8MBaselineOps())) {
	// ATOMIC_FENCE needs custom lowering; the others should have been expanded
	// to ldrex/strex loops already.
	setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
	if (!Subtarget->isThumb() \|\| !Subtarget->isMClass())
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);

	// On v8, we have particularly efficient implementations of atomic fences
	// if they can be combined with nearby atomic loads and stores.
	if (!Subtarget->hasV8Ops() \|\| getTargetMachine().getOptLevel() == 0) {
	// Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
	InsertFencesForAtomic = true;
	}
	} else {
	// If there's anything we can use as a barrier, go through custom lowering
	// for ATOMIC_FENCE.
	// If target has DMB in thumb, Fences can be inserted.
	if (Subtarget->hasDataBarrier())
	InsertFencesForAtomic = true;

	setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
	Subtarget->hasAnyDataBarrier() ? Custom : Expand);

	// Set them all for expansion, which will force libcalls.
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
	setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
	setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
	setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
	setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
	setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
	setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
	setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
	setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
	setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
	setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
	setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
	// Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
	// Unordered/Monotonic case.
	if (!InsertFencesForAtomic) {
	setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
	setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
	}
	}

	setOperationAction(ISD::PREFETCH, MVT::Other, Custom);

	// Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
	if (!Subtarget->hasV6Ops()) {
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
	}
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

	if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
	!Subtarget->isThumb1Only()) {
	// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
	// iff target supports vfp2.
	setOperationAction(ISD::BITCAST, MVT::i64, Custom);
	setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
	}

	// We want to custom lower some of our intrinsics.
	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
	setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
	setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
	setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
	if (Subtarget->useSjLjEH())
	setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");

	setOperationAction(ISD::SETCC, MVT::i32, Expand);
	setOperationAction(ISD::SETCC, MVT::f32, Expand);
	setOperationAction(ISD::SETCC, MVT::f64, Expand);
	setOperationAction(ISD::SELECT, MVT::i32, Custom);
	setOperationAction(ISD::SELECT, MVT::f32, Custom);
	setOperationAction(ISD::SELECT, MVT::f64, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);

	// Thumb-1 cannot currently select ARMISD::SUBE.
	if (!Subtarget->isThumb1Only())
	setOperationAction(ISD::SETCCE, MVT::i32, Custom);

	setOperationAction(ISD::BRCOND, MVT::Other, Expand);
	setOperationAction(ISD::BR_CC, MVT::i32, Custom);
	setOperationAction(ISD::BR_CC, MVT::f32, Custom);
	setOperationAction(ISD::BR_CC, MVT::f64, Custom);
	setOperationAction(ISD::BR_JT, MVT::Other, Custom);

	// We don't support sin/cos/fmod/copysign/pow
	setOperationAction(ISD::FSIN, MVT::f64, Expand);
	setOperationAction(ISD::FSIN, MVT::f32, Expand);
	setOperationAction(ISD::FCOS, MVT::f32, Expand);
	setOperationAction(ISD::FCOS, MVT::f64, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
	setOperationAction(ISD::FREM, MVT::f64, Expand);
	setOperationAction(ISD::FREM, MVT::f32, Expand);
	if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
	!Subtarget->isThumb1Only()) {
	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
	}
	setOperationAction(ISD::FPOW, MVT::f64, Expand);
	setOperationAction(ISD::FPOW, MVT::f32, Expand);

	if (!Subtarget->hasVFP4()) {
	setOperationAction(ISD::FMA, MVT::f64, Expand);
	setOperationAction(ISD::FMA, MVT::f32, Expand);
	}

	// Various VFP goodness
	if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
	// FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
	if (!Subtarget->hasFPARMv8() \|\| Subtarget->isFPOnlySP()) {
	setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
	setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
	}

	// fp16 is a special v7 extension that adds f16 <-> f32 conversions.
	if (!Subtarget->hasFP16()) {
	setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
	setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
	}
	}

	// Combine sin / cos into one node or libcall if possible.
	if (Subtarget->hasSinCos()) {
	setLibcallName(RTLIB::SINCOS_F32, "sincosf");
	setLibcallName(RTLIB::SINCOS_F64, "sincos");
	if (Subtarget->isTargetWatchABI()) {
	setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
	setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
	}
	if (Subtarget->isTargetIOS() \|\| Subtarget->isTargetWatchOS()) {
	// For iOS, we don't want to the normal expansion of a libcall to
	// sincos. We want to issue a libcall to __sincos_stret.
	setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
	setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
	}
	}

	// FP-ARMv8 implements a lot of rounding-like FP operations.
	if (Subtarget->hasFPARMv8()) {
	setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
	setOperationAction(ISD::FCEIL, MVT::f32, Legal);
	setOperationAction(ISD::FROUND, MVT::f32, Legal);
	setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
	setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
	setOperationAction(ISD::FRINT, MVT::f32, Legal);
	setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
	setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
	setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
	setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
	setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
	setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);

	if (!Subtarget->isFPOnlySP()) {
	setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
	setOperationAction(ISD::FCEIL, MVT::f64, Legal);
	setOperationAction(ISD::FROUND, MVT::f64, Legal);
	setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
	setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
	setOperationAction(ISD::FRINT, MVT::f64, Legal);
	setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
	setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
	}
	}

	if (Subtarget->hasNEON()) {
	// vmin and vmax aren't available in a scalar form, so we use
	// a NEON instruction with an undef lane instead.
	setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
	setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
	setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
	setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
	setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
	setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
	}

	// We have target-specific dag combine patterns for the following nodes:
	// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
	setTargetDAGCombine(ISD::ADD);
	setTargetDAGCombine(ISD::SUB);
	setTargetDAGCombine(ISD::MUL);
	setTargetDAGCombine(ISD::AND);
	setTargetDAGCombine(ISD::OR);
	setTargetDAGCombine(ISD::XOR);

	if (Subtarget->hasV6Ops())
	setTargetDAGCombine(ISD::SRL);

	setStackPointerRegisterToSaveRestore(ARM::SP);

	if (Subtarget->useSoftFloat() \|\| Subtarget->isThumb1Only() \|\|
	!Subtarget->hasVFP2())
	setSchedulingPreference(Sched::RegPressure);
	else
	setSchedulingPreference(Sched::Hybrid);

	//// temporary - rewrite interface to use type
	MaxStoresPerMemset = 8;
	MaxStoresPerMemsetOptSize = 4;
	MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
	MaxStoresPerMemcpyOptSize = 2;
	MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
	MaxStoresPerMemmoveOptSize = 2;

	// On ARM arguments smaller than 4 bytes are extended, so all arguments
	// are at least 4 bytes aligned.
	setMinStackArgumentAlignment(4);

	// Prefer likely predicted branches to selects on out-of-order cores.
	PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();

	setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
	}

	bool ARMTargetLowering::useSoftFloat() const {
	return Subtarget->useSoftFloat();
	}

	// FIXME: It might make sense to define the representative register class as the
	// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
	// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
	// SPR's representative would be DPR_VFP2. This should work well if register
	// pressure tracking were modified such that a register use would increment the
	// pressure of the register class's representative and all of it's super
	// classes' representatives transitively. We have not implemented this because
	// of the difficulty prior to coalescing of modeling operand register classes
	// due to the common occurrence of cross class copies and subregister insertions
	// and extractions.
	std::pair<const TargetRegisterClass *, uint8_t>
	ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
	MVT VT) const {
	const TargetRegisterClass *RRC = nullptr;
	uint8_t Cost = 1;
	switch (VT.SimpleTy) {
	default:
	return TargetLowering::findRepresentativeClass(TRI, VT);
	// Use DPR as representative register class for all floating point
	// and vector types. Since there are 32 SPR registers and 32 DPR registers so
	// the cost is 1 for both f32 and f64.
	case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
	case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
	RRC = &ARM::DPRRegClass;
	// When NEON is used for SP, only half of the register file is available
	// because operations that define both SP and DP results will be constrained
	// to the VFP2 class (D0-D15). We currently model this constraint prior to
	// coalescing by double-counting the SP regs. See the FIXME above.
	if (Subtarget->useNEONForSinglePrecisionFP())
	Cost = 2;
	break;
	case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
	case MVT::v4f32: case MVT::v2f64:
	RRC = &ARM::DPRRegClass;
	Cost = 2;
	break;
	case MVT::v4i64:
	RRC = &ARM::DPRRegClass;
	Cost = 4;
	break;
	case MVT::v8i64:
	RRC = &ARM::DPRRegClass;
	Cost = 8;
	break;
	}
	return std::make_pair(RRC, Cost);
	}

	const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
	switch ((ARMISD::NodeType)Opcode) {
	case ARMISD::FIRST_NUMBER: break;
	case ARMISD::Wrapper: return "ARMISD::Wrapper";
	case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
	case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
	case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
	case ARMISD::CALL: return "ARMISD::CALL";
	case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
	case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
	case ARMISD::BRCOND: return "ARMISD::BRCOND";
	case ARMISD::BR_JT: return "ARMISD::BR_JT";
	case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
	case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
	case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
	case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
	case ARMISD::CMP: return "ARMISD::CMP";
	case ARMISD::CMN: return "ARMISD::CMN";
	case ARMISD::CMPZ: return "ARMISD::CMPZ";
	case ARMISD::CMPFP: return "ARMISD::CMPFP";
	case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
	case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
	case ARMISD::FMSTAT: return "ARMISD::FMSTAT";

	case ARMISD::CMOV: return "ARMISD::CMOV";

	case ARMISD::SSAT: return "ARMISD::SSAT";

	case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
	case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
	case ARMISD::RRX: return "ARMISD::RRX";

	case ARMISD::ADDC: return "ARMISD::ADDC";
	case ARMISD::ADDE: return "ARMISD::ADDE";
	case ARMISD::SUBC: return "ARMISD::SUBC";
	case ARMISD::SUBE: return "ARMISD::SUBE";

	case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
	case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";

	case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
	case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
	case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";

	case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";

	case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";

	case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";

	case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";

	case ARMISD::PRELOAD: return "ARMISD::PRELOAD";

	case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
	case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";

	case ARMISD::VCEQ: return "ARMISD::VCEQ";
	case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
	case ARMISD::VCGE: return "ARMISD::VCGE";
	case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
	case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
	case ARMISD::VCGEU: return "ARMISD::VCGEU";
	case ARMISD::VCGT: return "ARMISD::VCGT";
	case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
	case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
	case ARMISD::VCGTU: return "ARMISD::VCGTU";
	case ARMISD::VTST: return "ARMISD::VTST";

	case ARMISD::VSHL: return "ARMISD::VSHL";
	case ARMISD::VSHRs: return "ARMISD::VSHRs";
	case ARMISD::VSHRu: return "ARMISD::VSHRu";
	case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
	case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
	case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
	case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
	case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
	case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
	case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
	case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
	case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
	case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
	case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
	case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
	case ARMISD::VSLI: return "ARMISD::VSLI";
	case ARMISD::VSRI: return "ARMISD::VSRI";
	case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
	case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
	case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
	case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
	case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
	case ARMISD::VDUP: return "ARMISD::VDUP";
	case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
	case ARMISD::VEXT: return "ARMISD::VEXT";
	case ARMISD::VREV64: return "ARMISD::VREV64";
	case ARMISD::VREV32: return "ARMISD::VREV32";
	case ARMISD::VREV16: return "ARMISD::VREV16";
	case ARMISD::VZIP: return "ARMISD::VZIP";
	case ARMISD::VUZP: return "ARMISD::VUZP";
	case ARMISD::VTRN: return "ARMISD::VTRN";
	case ARMISD::VTBL1: return "ARMISD::VTBL1";
	case ARMISD::VTBL2: return "ARMISD::VTBL2";
	case ARMISD::VMULLs: return "ARMISD::VMULLs";
	case ARMISD::VMULLu: return "ARMISD::VMULLu";
	case ARMISD::UMAAL: return "ARMISD::UMAAL";
	case ARMISD::UMLAL: return "ARMISD::UMLAL";
	case ARMISD::SMLAL: return "ARMISD::SMLAL";
	case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
	case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
	case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
	case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
	case ARMISD::SMULWB: return "ARMISD::SMULWB";
	case ARMISD::SMULWT: return "ARMISD::SMULWT";
	case ARMISD::SMLALD: return "ARMISD::SMLALD";
	case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
	case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
	case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
	case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
	case ARMISD::BFI: return "ARMISD::BFI";
	case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
	case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
	case ARMISD::VBSL: return "ARMISD::VBSL";
	case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
	case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
	case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
	case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
	case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
	case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
	case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
	case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
	case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
	case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
	case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
	case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
	case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
	case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
	case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
	case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
	case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
	case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
	case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
	case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
	case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
	case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
	case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
	}
	return nullptr;
	}

	EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
	EVT VT) const {
	if (!VT.isVector())
	return getPointerTy(DL);
	return VT.changeVectorElementTypeToInteger();
	}

	/// getRegClassFor - Return the register class that should be used for the
	/// specified value type.
	const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
	// Map v4i64 to QQ registers but do not make the type legal. Similarly map
	// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
	// load / store 4 to 8 consecutive D registers.
	if (Subtarget->hasNEON()) {
	if (VT == MVT::v4i64)
	return &ARM::QQPRRegClass;
	if (VT == MVT::v8i64)
	return &ARM::QQQQPRRegClass;
	}
	return TargetLowering::getRegClassFor(VT);
	}

	// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
	// source/dest is aligned and the copy size is large enough. We therefore want
	// to align such objects passed to memory intrinsics.
	bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
	unsigned &PrefAlign) const {
	if (!isa<MemIntrinsic>(CI))
	return false;
	MinSize = 8;
	// On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
	// cycle faster than 4-byte aligned LDM.
	PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
	return true;
	}

	// Create a fast isel object.
	FastISel *
	ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
	const TargetLibraryInfo *libInfo) const {
	return ARM::createFastISel(funcInfo, libInfo);
	}

	Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
	unsigned NumVals = N->getNumValues();
	if (!NumVals)
	return Sched::RegPressure;

	for (unsigned i = 0; i != NumVals; ++i) {
	EVT VT = N->getValueType(i);
	if (VT == MVT::Glue \|\| VT == MVT::Other)
	continue;
	if (VT.isFloatingPoint() \|\| VT.isVector())
	return Sched::ILP;
	}

	if (!N->isMachineOpcode())
	return Sched::RegPressure;

	// Load are scheduled for latency even if there instruction itinerary
	// is not available.
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());

	if (MCID.getNumDefs() == 0)
	return Sched::RegPressure;
	if (!Itins->isEmpty() &&
	Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
	return Sched::ILP;

	return Sched::RegPressure;
	}

	//===----------------------------------------------------------------------===//
	// Lowering Code
	//===----------------------------------------------------------------------===//

	static bool isSRL16(const SDValue &Op) {
	if (Op.getOpcode() != ISD::SRL)
	return false;
	if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
	return Const->getZExtValue() == 16;
	return false;
	}

	static bool isSRA16(const SDValue &Op) {
	if (Op.getOpcode() != ISD::SRA)
	return false;
	if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
	return Const->getZExtValue() == 16;
	return false;
	}

	static bool isSHL16(const SDValue &Op) {
	if (Op.getOpcode() != ISD::SHL)
	return false;
	if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
	return Const->getZExtValue() == 16;
	return false;
	}

	// Check for a signed 16-bit value. We special case SRA because it makes it
	// more simple when also looking for SRAs that aren't sign extending a
	// smaller value. Without the check, we'd need to take extra care with
	// checking order for some operations.
	static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
	if (isSRA16(Op))
	return isSHL16(Op.getOperand(0));
	return DAG.ComputeNumSignBits(Op) == 17;
	}

	/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
	static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
	switch (CC) {
	default: llvm_unreachable("Unknown condition code!");
	case ISD::SETNE: return ARMCC::NE;
	case ISD::SETEQ: return ARMCC::EQ;
	case ISD::SETGT: return ARMCC::GT;
	case ISD::SETGE: return ARMCC::GE;
	case ISD::SETLT: return ARMCC::LT;
	case ISD::SETLE: return ARMCC::LE;
	case ISD::SETUGT: return ARMCC::HI;
	case ISD::SETUGE: return ARMCC::HS;
	case ISD::SETULT: return ARMCC::LO;
	case ISD::SETULE: return ARMCC::LS;
	}
	}

	/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
	static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
	ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
	CondCode2 = ARMCC::AL;
	InvalidOnQNaN = true;
	switch (CC) {
	default: llvm_unreachable("Unknown FP condition!");
	case ISD::SETEQ:
	case ISD::SETOEQ:
	CondCode = ARMCC::EQ;
	InvalidOnQNaN = false;
	break;
	case ISD::SETGT:
	case ISD::SETOGT: CondCode = ARMCC::GT; break;
	case ISD::SETGE:
	case ISD::SETOGE: CondCode = ARMCC::GE; break;
	case ISD::SETOLT: CondCode = ARMCC::MI; break;
	case ISD::SETOLE: CondCode = ARMCC::LS; break;
	case ISD::SETONE:
	CondCode = ARMCC::MI;
	CondCode2 = ARMCC::GT;
	InvalidOnQNaN = false;
	break;
	case ISD::SETO: CondCode = ARMCC::VC; break;
	case ISD::SETUO: CondCode = ARMCC::VS; break;
	case ISD::SETUEQ:
	CondCode = ARMCC::EQ;
	CondCode2 = ARMCC::VS;
	InvalidOnQNaN = false;
	break;
	case ISD::SETUGT: CondCode = ARMCC::HI; break;
	case ISD::SETUGE: CondCode = ARMCC::PL; break;
	case ISD::SETLT:
	case ISD::SETULT: CondCode = ARMCC::LT; break;
	case ISD::SETLE:
	case ISD::SETULE: CondCode = ARMCC::LE; break;
	case ISD::SETNE:
	case ISD::SETUNE:
	CondCode = ARMCC::NE;
	InvalidOnQNaN = false;
	break;
	}
	}

	//===----------------------------------------------------------------------===//
	// Calling Convention Implementation
	//===----------------------------------------------------------------------===//

	#include "ARMGenCallingConv.inc"

	/// getEffectiveCallingConv - Get the effective calling convention, taking into
	/// account presence of floating point hardware and calling convention
	/// limitations, such as support for variadic functions.
	CallingConv::ID
	ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
	bool isVarArg) const {
	switch (CC) {
	default:
	llvm_unreachable("Unsupported calling convention");
	case CallingConv::ARM_AAPCS:
	case CallingConv::ARM_APCS:
	case CallingConv::GHC:
	return CC;
	case CallingConv::PreserveMost:
	return CallingConv::PreserveMost;
	case CallingConv::ARM_AAPCS_VFP:
	case CallingConv::Swift:
	return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
	case CallingConv::C:
	if (!Subtarget->isAAPCS_ABI())
	return CallingConv::ARM_APCS;
	else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
	getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
	!isVarArg)
	return CallingConv::ARM_AAPCS_VFP;
	else
	return CallingConv::ARM_AAPCS;
	case CallingConv::Fast:
	case CallingConv::CXX_FAST_TLS:
	if (!Subtarget->isAAPCS_ABI()) {
	if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
	return CallingConv::Fast;
	return CallingConv::ARM_APCS;
	} else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
	return CallingConv::ARM_AAPCS_VFP;
	else
	return CallingConv::ARM_AAPCS;
	}
	}

	CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
	bool isVarArg) const {
	return CCAssignFnForNode(CC, false, isVarArg);
	}

	CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
	bool isVarArg) const {
	return CCAssignFnForNode(CC, true, isVarArg);
	}

	/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
	/// CallingConvention.
	CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
	bool Return,
	bool isVarArg) const {
	switch (getEffectiveCallingConv(CC, isVarArg)) {
	default:
	llvm_unreachable("Unsupported calling convention");
	case CallingConv::ARM_APCS:
	return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
	case CallingConv::ARM_AAPCS:
	return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
	case CallingConv::ARM_AAPCS_VFP:
	return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
	case CallingConv::Fast:
	return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
	case CallingConv::GHC:
	return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
	case CallingConv::PreserveMost:
	return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
	}
	}

	/// LowerCallResult - Lower the result values of a call into the
	/// appropriate copies out of appropriate physical registers.
	SDValue ARMTargetLowering::LowerCallResult(
	SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
	SDValue ThisVal) const {

	// Assign locations to each value returned by this call.
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());
	CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));

	// Copy all of the result registers out of their specified physreg.
	for (unsigned i = 0; i != RVLocs.size(); ++i) {
	CCValAssign VA = RVLocs[i];

	// Pass 'this' value directly from the argument to return value, to avoid
	// reg unit interference
	if (i == 0 && isThisReturn) {
	assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
	"unexpected return calling convention register assignment");
	InVals.push_back(ThisVal);
	continue;
	}

	SDValue Val;
	if (VA.needsCustom()) {
	// Handle f64 or half of a v2f64.
	SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
	InFlag);
	Chain = Lo.getValue(1);
	InFlag = Lo.getValue(2);
	VA = RVLocs[++i]; // skip ahead to next loc
	SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
	InFlag);
	Chain = Hi.getValue(1);
	InFlag = Hi.getValue(2);
	if (!Subtarget->isLittle())
	std::swap (Lo, Hi);
	Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);

	if (VA.getLocVT() == MVT::v2f64) {
	SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
	Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
	DAG.getConstant(0, dl, MVT::i32));

	VA = RVLocs[++i]; // skip ahead to next loc
	Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
	Chain = Lo.getValue(1);
	InFlag = Lo.getValue(2);
	VA = RVLocs[++i]; // skip ahead to next loc
	Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
	Chain = Hi.getValue(1);
	InFlag = Hi.getValue(2);
	if (!Subtarget->isLittle())
	std::swap (Lo, Hi);
	Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
	Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
	DAG.getConstant(1, dl, MVT::i32));
	}
	} else {
	Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
	InFlag);
	Chain = Val.getValue(1);
	InFlag = Val.getValue(2);
	}

	switch (VA.getLocInfo()) {
	default: llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full: break;
	case CCValAssign::BCvt:
	Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
	break;
	}

	InVals.push_back(Val);
	}

	return Chain;
	}

	/// LowerMemOpCallTo - Store the argument to the stack.
	SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
	SDValue Arg, const SDLoc &dl,
	SelectionDAG &DAG,
	const CCValAssign &VA,
	ISD::ArgFlagsTy Flags) const {
	unsigned LocMemOffset = VA.getLocMemOffset();
	SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
	PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
	StackPtr, PtrOff);
	return DAG.getStore(
	Chain, dl, Arg, PtrOff,
	MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
	}

	void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
	SDValue Chain, SDValue &Arg,
	RegsToPassVector &RegsToPass,
	CCValAssign &VA, CCValAssign &NextVA,
	SDValue &StackPtr,
	SmallVectorImpl<SDValue> &MemOpChains,
	ISD::ArgFlagsTy Flags) const {

	SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
	DAG.getVTList(MVT::i32, MVT::i32), Arg);
	unsigned id = Subtarget->isLittle() ? 0 : 1;
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));

	if (NextVA.isRegLoc())
	RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
	else {
	assert(NextVA.isMemLoc());
	if (!StackPtr.getNode())
	StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
	getPointerTy(DAG.getDataLayout()));

	MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
	dl, DAG, NextVA,
	Flags));
	}
	}

	/// LowerCall - Lowering a call into a callseq_start <-
	/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
	/// nodes.
	SDValue
	ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const {
	SelectionDAG &DAG = CLI.DAG;
	SDLoc &dl = CLI.DL;
	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
	SDValue Chain = CLI.Chain;
	SDValue Callee = CLI.Callee;
	bool &isTailCall = CLI.IsTailCall;
	CallingConv::ID CallConv = CLI.CallConv;
	bool doesNotRet = CLI.DoesNotReturn;
	bool isVarArg = CLI.IsVarArg;

	MachineFunction &MF = DAG.getMachineFunction();
	bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
	bool isThisReturn = false;
	bool isSibCall = false;
	auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");

	// Disable tail calls if they're not supported.
	if (!Subtarget->supportsTailCall() \|\| Attr.getValueAsString() == "true")
	isTailCall = false;

	if (isTailCall) {
	// Check if it's really possible to do a tail call.
	isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
	isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
	Outs, OutVals, Ins, DAG);
	if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
	report_fatal_error("failed to perform tail call elimination on a call "
	"site marked musttail");
	// We don't support GuaranteedTailCallOpt for ARM, only automatically
	// detected sibcalls.
	if (isTailCall) {
	++NumTailCalls;
	isSibCall = true;
	}
	}

	// Analyze operands of the call, assigning locations to each operand.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
	*DAG.getContext());
	CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));

	// Get a count of how many bytes are to be pushed on the stack.
	unsigned NumBytes = CCInfo.getNextStackOffset();

	// For tail calls, memory operands are available in our caller's stack.
	if (isSibCall)
	NumBytes = 0;

	// Adjust the stack pointer for the new arguments...
	// These operations are automatically eliminated by the prolog/epilog pass
	if (!isSibCall)
	Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);

	SDValue StackPtr =
	DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));

	RegsToPassVector RegsToPass;
	SmallVector<SDValue, 8> MemOpChains;

	// Walk the register/memloc assignments, inserting copies/loads. In the case
	// of tail call optimization, arguments are handled later.
	for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
	i != e;
	++i, ++realArgIdx) {
	CCValAssign &VA = ArgLocs[i];
	SDValue Arg = OutVals[realArgIdx];
	ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
	bool isByVal = Flags.isByVal();

	// Promote the value if needed.
	switch (VA.getLocInfo()) {
	default: llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full: break;
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	case CCValAssign::AExt:
	Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	case CCValAssign::BCvt:
	Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
	break;
	}

	// f64 and v2f64 might be passed in i32 pairs and must be split into pieces
	if (VA.needsCustom()) {
	if (VA.getLocVT() == MVT::v2f64) {
	SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
	DAG.getConstant(0, dl, MVT::i32));
	SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
	DAG.getConstant(1, dl, MVT::i32));

	PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
	VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);

	VA = ArgLocs[++i]; // skip ahead to next loc
	if (VA.isRegLoc()) {
	PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
	VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
	} else {
	assert(VA.isMemLoc());

	MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
	dl, DAG, VA, Flags));
	}
	} else {
	PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
	StackPtr, MemOpChains, Flags);
	}
	} else if (VA.isRegLoc()) {
	if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
	Outs[0].VT == MVT::i32) {
	assert(VA.getLocVT() == MVT::i32 &&
	"unexpected calling convention register assignment");
	assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
	"unexpected use of 'returned'");
	isThisReturn = true;
	}
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
	} else if (isByVal) {
	assert(VA.isMemLoc());
	unsigned offset = 0;

	// True if this byval aggregate will be split between registers
	// and memory.
	unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
	unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();

	if (CurByValIdx < ByValArgsCount) {

	unsigned RegBegin, RegEnd;
	CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);

	EVT PtrVT =
	DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
	unsigned int i, j;
	for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
	SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
	SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
	SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
	MachinePointerInfo(),
	DAG.InferPtrAlignment(AddArg));
	MemOpChains.push_back(Load.getValue(1));
	RegsToPass.push_back(std::make_pair(j, Load));
	}

	// If parameter size outsides register area, "offset" value
	// helps us to calculate stack slot for remained part properly.
	offset = RegEnd - RegBegin;

	CCInfo.nextInRegsParam();
	}

	if (Flags.getByValSize() > 4*offset) {
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	unsigned LocMemOffset = VA.getLocMemOffset();
	SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
	SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
	SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
	SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
	SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
	MVT::i32);
	SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
	MVT::i32);

	SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
	MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
	Ops));
	}
	} else if (!isSibCall) {
	assert(VA.isMemLoc());

	MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
	dl, DAG, VA, Flags));
	}
	}

	if (!MemOpChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

	// Build a sequence of copy-to-reg nodes chained together with token chain
	// and flag operands which copy the outgoing args into the appropriate regs.
	SDValue InFlag;
	// Tail call byval lowering might overwrite argument registers so in case of
	// tail call optimization the copies to registers are lowered later.
	if (!isTailCall)
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
	Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
	RegsToPass[i].second, InFlag);
	InFlag = Chain.getValue(1);
	}

	// For tail calls lower the arguments to the 'real' stack slot.
	if (isTailCall) {
	// Force all the incoming stack arguments to be loaded from the stack
	// before any new outgoing arguments are stored to the stack, because the
	// outgoing stack slots may alias the incoming argument stack slots, and
	// the alias isn't otherwise explicit. This is slightly more conservative
	// than necessary, because it means that each store effectively depends
	// on every argument instead of just those arguments it would clobber.

	// Do not flag preceding copytoreg stuff together with the following stuff.
	InFlag = SDValue();
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
	Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
	RegsToPass[i].second, InFlag);
	InFlag = Chain.getValue(1);
	}
	InFlag = SDValue();
	}

	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
	// node so that legalize doesn't hack it.
	bool isDirect = false;

	const TargetMachine &TM = getTargetMachine();
	const Module *Mod = MF.getFunction()->getParent();
	const GlobalValue *GV = nullptr;
	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
	GV = G->getGlobal();
	bool isStub =
	!TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();

	bool isARMFunc = !Subtarget->isThumb() \|\| (isStub && !Subtarget->isMClass());
	bool isLocalARMFunc = false;
	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
	auto PtrVt = getPointerTy(DAG.getDataLayout());

	if (Subtarget->genLongCalls()) {
	assert((!isPositionIndependent() \|\| Subtarget->isTargetWindows()) &&
	"long-calls codegen is not position independent!");
	// Handle a global address or an external symbol. If it's not one of
	// those, the target's already in a register, so we don't need to do
	// anything extra.
	if (isa<GlobalAddressSDNode>(Callee)) {
	// Create a constant pool entry for the callee address
	unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
	ARMConstantPoolValue *CPV =
	ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);

	// Get the address of the callee into a register
	SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
	CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
	Callee = DAG.getLoad(
	PtrVt, dl, DAG.getEntryNode(), CPAddr,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
	} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
	const char *Sym = S->getSymbol();

	// Create a constant pool entry for the callee address
	unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
	ARMConstantPoolValue *CPV =
	ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
	ARMPCLabelIndex, 0);
	// Get the address of the callee into a register
	SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
	CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
	Callee = DAG.getLoad(
	PtrVt, dl, DAG.getEntryNode(), CPAddr,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
	}
	} else if (isa<GlobalAddressSDNode>(Callee)) {
	// If we're optimizing for minimum size and the function is called three or
	// more times in this block, we can improve codesize by calling indirectly
	// as BLXr has a 16-bit encoding.
	auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
	auto *BB = CLI.CS->getParent();
	bool PreferIndirect =
	Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
	count_if(GV->users(), [&BB](const User *U) {
	return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
	}) > 2;

	if (!PreferIndirect) {
	isDirect = true;
	bool isDef = GV->isStrongDefinitionForLinker();

	// ARM call to a local ARM function is predicable.
	isLocalARMFunc = !Subtarget->isThumb() && (isDef \|\| !ARMInterworking);
	// tBX takes a register source operand.
	if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
	assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
	Callee = DAG.getNode(
	ARMISD::WrapperPIC, dl, PtrVt,
	DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
	Callee = DAG.getLoad(
	PtrVt, dl, DAG.getEntryNode(), Callee,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()),
	/* Alignment = */ 0, MachineMemOperand::MODereferenceable \|
	MachineMemOperand::MOInvariant);
	} else if (Subtarget->isTargetCOFF()) {
	assert(Subtarget->isTargetWindows() &&
	"Windows is the only supported COFF target");
	unsigned TargetFlags = GV->hasDLLImportStorageClass()
	? ARMII::MO_DLLIMPORT
	: ARMII::MO_NO_FLAG;
	Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /Offset=/0,
	TargetFlags);
	if (GV->hasDLLImportStorageClass())
	Callee =
	DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
	DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));
	} else {
	Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
	}
	}
	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
	isDirect = true;
	// tBX takes a register source operand.
	const char *Sym = S->getSymbol();
	if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
	unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
	ARMConstantPoolValue *CPV =
	ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
	ARMPCLabelIndex, 4);
	SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
	CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
	Callee = DAG.getLoad(
	PtrVt, dl, DAG.getEntryNode(), CPAddr,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
	SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
	Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
	} else {
	Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
	}
	}

	// FIXME: handle tail calls differently.
	unsigned CallOpc;
	if (Subtarget->isThumb()) {
	if ((!isDirect \|\| isARMFunc) && !Subtarget->hasV5TOps())
	CallOpc = ARMISD::CALL_NOLINK;
	else
	CallOpc = ARMISD::CALL;
	} else {
	if (!isDirect && !Subtarget->hasV5TOps())
	CallOpc = ARMISD::CALL_NOLINK;
	else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
	// Emit regular call when code size is the priority
	!MF.getFunction()->optForMinSize())
	// "mov lr, pc; b _foo" to avoid confusing the RSP
	CallOpc = ARMISD::CALL_NOLINK;
	else
	CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
	}

	std::vector<SDValue> Ops;
	Ops.push_back(Chain);
	Ops.push_back(Callee);

	// Add argument registers to the end of the list so that they are known live
	// into the call.
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
	RegsToPass[i].second.getValueType()));

	// Add a register mask operand representing the call-preserved registers.
	if (!isTailCall) {
	const uint32_t *Mask;
	const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
	if (isThisReturn) {
	// For 'this' returns, use the R0-preserving mask if applicable
	Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
	if (!Mask) {
	// Set isThisReturn to false if the calling convention is not one that
	// allows 'returned' to be modeled in this way, so LowerCallResult does
	// not try to pass 'this' straight through
	isThisReturn = false;
	Mask = ARI->getCallPreservedMask(MF, CallConv);
	}
	} else
	Mask = ARI->getCallPreservedMask(MF, CallConv);

	assert(Mask && "Missing call preserved mask for calling convention");
	Ops.push_back(DAG.getRegisterMask(Mask));
	}

	if (InFlag.getNode())
	Ops.push_back(InFlag);

	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	if (isTailCall) {
	MF.getFrameInfo().setHasTailCall();
	return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
	}

	// Returns a chain and a flag for retval copy to use.
	Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
	InFlag = Chain.getValue(1);

	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
	DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
	if (!Ins.empty())
	InFlag = Chain.getValue(1);

	// Handle result values, copying them out of physregs into vregs that we
	// return.
	return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
	InVals, isThisReturn,
	isThisReturn ? OutVals[0] : SDValue());
	}

	/// HandleByVal - Every parameter after a byval parameter is passed
	/// on the stack. Remember the next parameter register to allocate,
	/// and then confiscate the rest of the parameter registers to insure
	/// this.
	void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
	unsigned Align) const {
	// Byval (as with any stack) slots are always at least 4 byte aligned.
	Align = std::max(Align, 4U);

	unsigned Reg = State->AllocateReg(GPRArgRegs);
	if (!Reg)
	return;

	unsigned AlignInRegs = Align / 4;
	unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
	for (unsigned i = 0; i < Waste; ++i)
	Reg = State->AllocateReg(GPRArgRegs);

	if (!Reg)
	return;

	unsigned Excess = 4 * (ARM::R4 - Reg);

	// Special case when NSAA != SP and parameter size greater than size of
	// all remained GPR regs. In that case we can't split parameter, we must
	// send it to stack. We also must set NCRN to R4, so waste all
	// remained registers.
	const unsigned NSAAOffset = State->getNextStackOffset();
	if (NSAAOffset != 0 && Size > Excess) {
	while (State->AllocateReg(GPRArgRegs))
	;
	return;
	}

	// First register for byval parameter is the first register that wasn't
	// allocated before this method call, so it would be "reg".
	// If parameter is small enough to be saved in range [reg, r4), then
	// the end (first after last) register would be reg + param-size-in-regs,
	// else parameter would be splitted between registers and stack,
	// end register would be r4 in this case.
	unsigned ByValRegBegin = Reg;
	unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
	State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
	// Note, first register is allocated in the beginning of function already,
	// allocate remained amount of registers we need.
	for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
	State->AllocateReg(GPRArgRegs);
	// A byval parameter that is split between registers and memory needs its
	// size truncated here.
	// In the case where the entire structure fits in registers, we set the
	// size in memory to zero.
	Size = std::max<int>(Size - Excess, 0);
	}

	/// MatchingStackOffset - Return true if the given stack call argument is
	/// already available in the same position (relatively) of the caller's
	/// incoming argument stack.
	static
	bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
	MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
	const TargetInstrInfo *TII) {
	unsigned Bytes = Arg.getValueSizeInBits() / 8;
	int FI = std::numeric_limits<int>::max();
	if (Arg.getOpcode() == ISD::CopyFromReg) {
	unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
	if (!TargetRegisterInfo::isVirtualRegister(VR))
	return false;
	MachineInstr *Def = MRI->getVRegDef(VR);
	if (!Def)
	return false;
	if (!Flags.isByVal()) {
	if (!TII->isLoadFromStackSlot(*Def, FI))
	return false;
	} else {
	return false;
	}
	} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
	if (Flags.isByVal())
	// ByVal argument is passed in as a pointer but it's now being
	// dereferenced. e.g.
	// define @foo(%struct.X* %A) {
	// tail call @bar(%struct.X* byval %A)
	// }
	return false;
	SDValue Ptr = Ld->getBasePtr();
	FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
	if (!FINode)
	return false;
	FI = FINode->getIndex();
	} else
	return false;

	assert(FI != std::numeric_limits<int>::max());
	if (!MFI.isFixedObjectIndex(FI))
	return false;
	return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
	}

	/// IsEligibleForTailCallOptimization - Check whether the call is eligible
	/// for tail call optimization. Targets which want to do tail call
	/// optimization should implement this function.
	bool
	ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
	CallingConv::ID CalleeCC,
	bool isVarArg,
	bool isCalleeStructRet,
	bool isCallerStructRet,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SmallVectorImpl<ISD::InputArg> &Ins,
	SelectionDAG& DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	const Function *CallerF = MF.getFunction();
	CallingConv::ID CallerCC = CallerF->getCallingConv();

	assert(Subtarget->supportsTailCall());

	// Look for obvious safe cases to perform tail call optimization that do not
	// require ABI changes. This is what gcc calls sibcall.

	// Exception-handling functions need a special set of instructions to indicate
	// a return to the hardware. Tail-calling another function would probably
	// break this.
	if (CallerF->hasFnAttribute("interrupt"))
	return false;

	// Also avoid sibcall optimization if either caller or callee uses struct
	// return semantics.
	if (isCalleeStructRet \|\| isCallerStructRet)
	return false;

	// Externally-defined functions with weak linkage should not be
	// tail-called on ARM when the OS does not support dynamic
	// pre-emption of symbols, as the AAELF spec requires normal calls
	// to undefined weak functions to be replaced with a NOP or jump to the
	// next instruction. The behaviour of branch instructions in this
	// situation (as used for tail calls) is implementation-defined, so we
	// cannot rely on the linker replacing the tail call with a return.
	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
	const GlobalValue *GV = G->getGlobal();
	const Triple &TT = getTargetMachine().getTargetTriple();
	if (GV->hasExternalWeakLinkage() &&
	(!TT.isOSWindows() \|\| TT.isOSBinFormatELF() \|\| TT.isOSBinFormatMachO()))
	return false;
	}

	// Check that the call results are passed in the same way.
	LLVMContext &C = *DAG.getContext();
	if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
	CCAssignFnForReturn(CalleeCC, isVarArg),
	CCAssignFnForReturn(CallerCC, isVarArg)))
	return false;
	// The callee has to preserve all registers the caller needs to preserve.
	const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
	if (CalleeCC != CallerCC) {
	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
	if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
	return false;
	}

	// If Caller's vararg or byval argument has been split between registers and
	// stack, do not perform tail call, since part of the argument is in caller's
	// local frame.
	const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
	if (AFI_Caller->getArgRegsSaveSize())
	return false;

	// If the callee takes no arguments then go on to check the results of the
	// call.
	if (!Outs.empty()) {
	// Check if stack adjustment is needed. For now, do not do this if any
	// argument is passed on the stack.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
	CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
	if (CCInfo.getNextStackOffset()) {
	// Check if the arguments are already laid out in the right way as
	// the caller's fixed stack objects.
	MachineFrameInfo &MFI = MF.getFrameInfo();
	const MachineRegisterInfo *MRI = &MF.getRegInfo();
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
	i != e;
	++i, ++realArgIdx) {
	CCValAssign &VA = ArgLocs[i];
	EVT RegVT = VA.getLocVT();
	SDValue Arg = OutVals[realArgIdx];
	ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
	if (VA.getLocInfo() == CCValAssign::Indirect)
	return false;
	if (VA.needsCustom()) {
	// f64 and vector types are split into multiple registers or
	// register/stack-slot combinations. The types will not match
	// the registers; give up on memory f64 refs until we figure
	// out what to do about this.
	if (!VA.isRegLoc())
	return false;
	if (!ArgLocs[++i].isRegLoc())
	return false;
	if (RegVT == MVT::v2f64) {
	if (!ArgLocs[++i].isRegLoc())
	return false;
	if (!ArgLocs[++i].isRegLoc())
	return false;
	}
	} else if (!VA.isRegLoc()) {
	if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
	MFI, MRI, TII))
	return false;
	}
	}
	}

	const MachineRegisterInfo &MRI = MF.getRegInfo();
	if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
	return false;
	}

	return true;
	}

	bool
	ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
	MachineFunction &MF, bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	LLVMContext &Context) const {
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
	return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
	}

	static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
	const SDLoc &DL, SelectionDAG &DAG) {
	const MachineFunction &MF = DAG.getMachineFunction();
	const Function *F = MF.getFunction();

	StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();

	// See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
	// version of the "preferred return address". These offsets affect the return
	// instruction if this is a return from PL1 without hypervisor extensions.
	// IRQ/FIQ: +4 "subs pc, lr, #4"
	// SWI: 0 "subs pc, lr, #0"
	// ABORT: +4 "subs pc, lr, #4"
	// UNDEF: +4/+2 "subs pc, lr, #0"
	// UNDEF varies depending on where the exception came from ARM or Thumb
	// mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.

	int64_t LROffset;
	if (IntKind == "" \|\| IntKind == "IRQ" \|\| IntKind == "FIQ" \|\|
	IntKind == "ABORT")
	LROffset = 4;
	else if (IntKind == "SWI" \|\| IntKind == "UNDEF")
	LROffset = 0;
	else
	report_fatal_error("Unsupported interrupt attribute. If present, value "
	"must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");

	RetOps.insert(RetOps.begin() + 1,
	DAG.getConstant(LROffset, DL, MVT::i32, false));

	return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
	}

	SDValue
	ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
	bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &dl, SelectionDAG &DAG) const {

	// CCValAssign - represent the assignment of the return value to a location.
	SmallVector<CCValAssign, 16> RVLocs;

	// CCState - Info about the registers and stack slots.
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());

	// Analyze outgoing return values.
	CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));

	SDValue Flag;
	SmallVector<SDValue, 4> RetOps;
	RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
	bool isLittleEndian = Subtarget->isLittle();

	MachineFunction &MF = DAG.getMachineFunction();
	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
	AFI->setReturnRegsCount(RVLocs.size());

	// Copy the result values into the output registers.
	for (unsigned i = 0, realRVLocIdx = 0;
	i != RVLocs.size();
	++i, ++realRVLocIdx) {
	CCValAssign &VA = RVLocs[i];
	assert(VA.isRegLoc() && "Can only return in registers!");

	SDValue Arg = OutVals[realRVLocIdx];

	switch (VA.getLocInfo()) {
	default: llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full: break;
	case CCValAssign::BCvt:
	Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
	break;
	}

	if (VA.needsCustom()) {
	if (VA.getLocVT() == MVT::v2f64) {
	// Extract the first half and return it in two registers.
	SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
	DAG.getConstant(0, dl, MVT::i32));
	SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
	DAG.getVTList(MVT::i32, MVT::i32), Half);

	Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
	HalfGPRs.getValue(isLittleEndian ? 0 : 1),
	Flag);
	Flag = Chain.getValue(1);
	RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
	VA = RVLocs[++i]; // skip ahead to next loc
	Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
	HalfGPRs.getValue(isLittleEndian ? 1 : 0),
	Flag);
	Flag = Chain.getValue(1);
	RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
	VA = RVLocs[++i]; // skip ahead to next loc

	// Extract the 2nd half and fall through to handle it as an f64 value.
	Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
	DAG.getConstant(1, dl, MVT::i32));
	}
	// Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
	// available.
	SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
	DAG.getVTList(MVT::i32, MVT::i32), Arg);
	Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
	fmrrd.getValue(isLittleEndian ? 0 : 1),
	Flag);
	Flag = Chain.getValue(1);
	RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
	VA = RVLocs[++i]; // skip ahead to next loc
	Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
	fmrrd.getValue(isLittleEndian ? 1 : 0),
	Flag);
	} else
	Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);

	// Guarantee that all emitted copies are
	// stuck together, avoiding something bad.
	Flag = Chain.getValue(1);
	RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
	}
	const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
	const MCPhysReg *I =
	TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
	if (I) {
	for (; *I; ++I) {
	if (ARM::GPRRegClass.contains(*I))
	RetOps.push_back(DAG.getRegister(*I, MVT::i32));
	else if (ARM::DPRRegClass.contains(*I))
	RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
	else
	llvm_unreachable("Unexpected register class in CSRsViaCopy!");
	}
	}

	// Update chain and glue.
	RetOps[0] = Chain;
	if (Flag.getNode())
	RetOps.push_back(Flag);

	// CPUs which aren't M-class use a special sequence to return from
	// exceptions (roughly, any instruction setting pc and cpsr simultaneously,
	// though we use "subs pc, lr, #N").
	//
	// M-class CPUs actually use a normal return sequence with a special
	// (hardware-provided) value in LR, so the normal code path works.
	if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
	!Subtarget->isMClass()) {
	if (Subtarget->isThumb1Only())
	report_fatal_error("interrupt attribute is not supported in Thumb1");
	return LowerInterruptReturn(RetOps, dl, DAG);
	}

	return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
	}

	bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
	if (N->getNumValues() != 1)
	return false;
	if (!N->hasNUsesOfValue(1, 0))
	return false;

	SDValue TCChain = Chain;
	SDNode Copy = N->use_begin();
	if (Copy->getOpcode() == ISD::CopyToReg) {
	// If the copy has a glue operand, we conservatively assume it isn't safe to
	// perform a tail call.
	if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
	return false;
	TCChain = Copy->getOperand(0);
	} else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
	SDNode *VMov = Copy;
	// f64 returned in a pair of GPRs.
	SmallPtrSet<SDNode*, 2> Copies;
	for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
	UI != UE; ++UI) {
	if (UI->getOpcode() != ISD::CopyToReg)
	return false;
	Copies.insert(*UI);
	}
	if (Copies.size() > 2)
	return false;

	for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
	UI != UE; ++UI) {
	SDValue UseChain = UI->getOperand(0);
	if (Copies.count(UseChain.getNode()))
	// Second CopyToReg
	Copy = *UI;
	else {
	// We are at the top of this chain.
	// If the copy has a glue operand, we conservatively assume it
	// isn't safe to perform a tail call.
	if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
	return false;
	// First CopyToReg
	TCChain = UseChain;
	}
	}
	} else if (Copy->getOpcode() == ISD::BITCAST) {
	// f32 returned in a single GPR.
	if (!Copy->hasOneUse())
	return false;
	Copy = *Copy->use_begin();
	if (Copy->getOpcode() != ISD::CopyToReg \|\| !Copy->hasNUsesOfValue(1, 0))
	return false;
	// If the copy has a glue operand, we conservatively assume it isn't safe to
	// perform a tail call.
	if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
	return false;
	TCChain = Copy->getOperand(0);
	} else {
	return false;
	}

	bool HasRet = false;
	for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
	UI != UE; ++UI) {
	if (UI->getOpcode() != ARMISD::RET_FLAG &&
	UI->getOpcode() != ARMISD::INTRET_FLAG)
	return false;
	HasRet = true;
	}

	if (!HasRet)
	return false;

	Chain = TCChain;
	return true;
	}

	bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
	if (!Subtarget->supportsTailCall())
	return false;

	auto Attr =
	CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
	if (!CI->isTailCall() \|\| Attr.getValueAsString() == "true")
	return false;

	return true;
	}

	// Trying to write a 64 bit value so need to split into two 32 bit values first,
	// and pass the lower and high parts through.
	static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
	SDLoc DL(Op);
	SDValue WriteValue = Op->getOperand(2);

	// This function is only supposed to be called for i64 type argument.
	assert(WriteValue.getValueType() == MVT::i64
	&& "LowerWRITE_REGISTER called for non-i64 type argument.");

	SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
	DAG.getConstant(0, DL, MVT::i32));
	SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
	DAG.getConstant(1, DL, MVT::i32));
	SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
	return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
	}

	// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
	// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
	// one of the above mentioned nodes. It has to be wrapped because otherwise
	// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
	// be used to form addressing mode. These wrapped nodes will be selected
	// into MOVi.
	SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
	SelectionDAG &DAG) const {
	EVT PtrVT = Op.getValueType();
	// FIXME there is no actual debug info here
	SDLoc dl(Op);
	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
	SDValue Res;

	// When generating execute-only code Constant Pools must be promoted to the
	// global data section. It's a bit ugly that we can't share them across basic
	// blocks, but this way we guarantee that execute-only behaves correct with
	// position-independent addressing modes.
	if (Subtarget->genExecuteOnly()) {
	auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
	auto T = const_cast<Type*>(CP->getType());
	auto C = const_cast<Constant*>(CP->getConstVal());
	auto M = const_cast<Module*>(DAG.getMachineFunction().
	getFunction()->getParent());
	auto GV = new GlobalVariable(
	M, T, /isConst=*/true, GlobalVariable::InternalLinkage, C,
	Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
	Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
	Twine(AFI->createPICLabelUId())
	);
	SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
	dl, PtrVT);
	return LowerGlobalAddress(GA, DAG);
	}

	if (CP->isMachineConstantPoolEntry())
	Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
	CP->getAlignment());
	else
	Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
	CP->getAlignment());
	return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
	}

	unsigned ARMTargetLowering::getJumpTableEncoding() const {
	return MachineJumpTableInfo::EK_Inline;
	}

	SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
	unsigned ARMPCLabelIndex = 0;
	SDLoc DL(Op);
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
	SDValue CPAddr;
	bool IsPositionIndependent = isPositionIndependent() \|\| Subtarget->isROPI();
	if (!IsPositionIndependent) {
	CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
	} else {
	unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
	ARMPCLabelIndex = AFI->createPICLabelUId();
	ARMConstantPoolValue *CPV =
	ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
	ARMCP::CPBlockAddress, PCAdj);
	CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
	}
	CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
	SDValue Result = DAG.getLoad(
	PtrVT, DL, DAG.getEntryNode(), CPAddr,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
	if (!IsPositionIndependent)
	return Result;
	SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
	return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
	}

	/// \brief Convert a TLS address reference into the correct sequence of loads
	/// and calls to compute the variable's address for Darwin, and return an
	/// SDValue containing the final node.

	/// Darwin only has one TLS scheme which must be capable of dealing with the
	/// fully general situation, in the worst case. This means:
	/// + "extern __thread" declaration.
	/// + Defined in a possibly unknown dynamic library.
	///
	/// The general system is that each __thread variable has a [3 x i32] descriptor
	/// which contains information used by the runtime to calculate the address. The
	/// only part of this the compiler needs to know about is the first word, which
	/// contains a function pointer that must be called with the address of the
	/// entire descriptor in "r0".
	///
	/// Since this descriptor may be in a different unit, in general access must
	/// proceed along the usual ARM rules. A common sequence to produce is:
	///
	/// movw rT1, :lower16:_var$non_lazy_ptr
	/// movt rT1, :upper16:_var$non_lazy_ptr
	/// ldr r0, [rT1]
	/// ldr rT2, [r0]
	/// blx rT2
	/// [...address now in r0...]
	SDValue
	ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
	SDLoc DL(Op);

	// First step is to get the address of the actua global symbol. This is where
	// the TLS descriptor lives.
	SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);

	// The first entry in the descriptor is a function pointer that we must call
	// to obtain the address of the variable.
	SDValue Chain = DAG.getEntryNode();
	SDValue FuncTLVGet = DAG.getLoad(
	MVT::i32, DL, Chain, DescAddr,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()),
	/* Alignment = */ 4,
	MachineMemOperand::MONonTemporal \| MachineMemOperand::MODereferenceable \|
	MachineMemOperand::MOInvariant);
	Chain = FuncTLVGet.getValue(1);

	MachineFunction &F = DAG.getMachineFunction();
	MachineFrameInfo &MFI = F.getFrameInfo();
	MFI.setAdjustsStack(true);

	// TLS calls preserve all registers except those that absolutely must be
	// trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
	// silly).
	auto TRI =
	getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
	auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
	const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());

	// Finally, we can make the call. This is just a degenerate version of a
	// normal AArch64 call node: r0 takes the address of the descriptor, and
	// returns the address of the variable in this thread.
	Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
	Chain =
	DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
	Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
	DAG.getRegisterMask(Mask), Chain.getValue(1));
	return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
	}

	SDValue
	ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");

	SDValue Chain = DAG.getEntryNode();
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDLoc DL(Op);

	// Load the current TEB (thread environment block)
	SDValue Ops[] = {Chain,
	DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
	DAG.getConstant(15, DL, MVT::i32),
	DAG.getConstant(0, DL, MVT::i32),
	DAG.getConstant(13, DL, MVT::i32),
	DAG.getConstant(0, DL, MVT::i32),
	DAG.getConstant(2, DL, MVT::i32)};
	SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
	DAG.getVTList(MVT::i32, MVT::Other), Ops);

	SDValue TEB = CurrentTEB.getValue(0);
	Chain = CurrentTEB.getValue(1);

	// Load the ThreadLocalStoragePointer from the TEB
	// A pointer to the TLS array is located at offset 0x2c from the TEB.
	SDValue TLSArray =
	DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
	TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());

	// The pointer to the thread's TLS data area is at the TLS Index scaled by 4
	// offset into the TLSArray.

	// Load the TLS index from the C runtime
	SDValue TLSIndex =
	DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
	TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
	TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());

	SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
	DAG.getConstant(2, DL, MVT::i32));
	SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
	DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
	MachinePointerInfo());

	// Get the offset of the start of the .tls section (section base)
	const auto *GA = cast<GlobalAddressSDNode>(Op);
	auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
	SDValue Offset = DAG.getLoad(
	PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
	DAG.getTargetConstantPool(CPV, PtrVT, 4)),
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

	return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
	}

	// Lower ISD::GlobalTLSAddress using the "general dynamic" model
	SDValue
	ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
	SelectionDAG &DAG) const {
	SDLoc dl(GA);
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
	MachineFunction &MF = DAG.getMachineFunction();
	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
	unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
	ARMConstantPoolValue *CPV =
	ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
	ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
	SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
	Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
	Argument = DAG.getLoad(
	PtrVT, dl, DAG.getEntryNode(), Argument,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
	SDValue Chain = Argument.getValue(1);

	SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
	Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);

	// call __tls_get_addr.
	ArgListTy Args;
	ArgListEntry Entry;
	Entry.Node = Argument;
	Entry.Ty = (Type ) Type::getInt32Ty(DAG.getContext());
	Args.push_back(Entry);

	// FIXME: is there useful debug info available here?
	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
	CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
	DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));

	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
	return CallResult.first;
	}

	// Lower ISD::GlobalTLSAddress using the "initial exec" or
	// "local exec" model.
	SDValue
	ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
	SelectionDAG &DAG,
	TLSModel::Model model) const {
	const GlobalValue *GV = GA->getGlobal();
	SDLoc dl(GA);
	SDValue Offset;
	SDValue Chain = DAG.getEntryNode();
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	// Get the Thread Pointer
	SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);

	if (model == TLSModel::InitialExec) {
	MachineFunction &MF = DAG.getMachineFunction();
	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
	unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
	// Initial exec model.
	unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
	ARMConstantPoolValue *CPV =
	ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
	ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
	true);
	Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
	Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
	Offset = DAG.getLoad(
	PtrVT, dl, Chain, Offset,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
	Chain = Offset.getValue(1);

	SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
	Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);

	Offset = DAG.getLoad(
	PtrVT, dl, Chain, Offset,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
	} else {
	// local exec model
	assert(model == TLSModel::LocalExec);
	ARMConstantPoolValue *CPV =
	ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
	Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
	Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
	Offset = DAG.getLoad(
	PtrVT, dl, Chain, Offset,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
	}

	// The address of the thread local variable is the add of the thread
	// pointer with the offset of the variable.
	return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
	}

	SDValue
	ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
	if (Subtarget->isTargetDarwin())
	return LowerGlobalTLSAddressDarwin(Op, DAG);

	if (Subtarget->isTargetWindows())
	return LowerGlobalTLSAddressWindows(Op, DAG);

	// TODO: implement the "local dynamic" model
	assert(Subtarget->isTargetELF() && "Only ELF implemented here");
	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
	if (DAG.getTarget().Options.EmulatedTLS)
	return LowerToTLSEmulatedModel(GA, DAG);

	TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());

	switch (model) {
	case TLSModel::GeneralDynamic:
	case TLSModel::LocalDynamic:
	return LowerToTLSGeneralDynamicModel(GA, DAG);
	case TLSModel::InitialExec:
	case TLSModel::LocalExec:
	return LowerToTLSExecModels(GA, DAG, model);
	}
	llvm_unreachable("bogus TLS model");
	}

	/// Return true if all users of V are within function F, looking through
	/// ConstantExprs.
	static bool allUsersAreInFunction(const Value V, const Function F) {
	SmallVector<const User*,4> Worklist;
	for (auto *U : V->users())
	Worklist.push_back(U);
	while (!Worklist.empty()) {
	auto *U = Worklist.pop_back_val();
	if (isa<ConstantExpr>(U)) {
	for (auto *UU : U->users())
	Worklist.push_back(UU);
	continue;
	}

	auto *I = dyn_cast<Instruction>(U);
	if (!I \|\| I->getParent()->getParent() != F)
	return false;
	}
	return true;
	}

	/// Return true if all users of V are within some (any) function, looking through
	/// ConstantExprs. In other words, are there any global constant users?
	static bool allUsersAreInFunctions(const Value *V) {
	SmallVector<const User*,4> Worklist;
	for (auto *U : V->users())
	Worklist.push_back(U);
	while (!Worklist.empty()) {
	auto *U = Worklist.pop_back_val();
	if (isa<ConstantExpr>(U)) {
	for (auto *UU : U->users())
	Worklist.push_back(UU);
	continue;
	}

	if (!isa<Instruction>(U))
	return false;
	}
	return true;
	}

	// Return true if T is an integer, float or an array/vector of either.
	static bool isSimpleType(Type *T) {
	if (T->isIntegerTy() \|\| T->isFloatingPointTy())
	return true;
	Type *SubT = nullptr;
	if (T->isArrayTy())
	SubT = T->getArrayElementType();
	else if (T->isVectorTy())
	SubT = T->getVectorElementType();
	else
	return false;
	return SubT->isIntegerTy() \|\| SubT->isFloatingPointTy();
	}

	static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
	EVT PtrVT, const SDLoc &dl) {
	// If we're creating a pool entry for a constant global with unnamed address,
	// and the global is small enough, we can emit it inline into the constant pool
	// to save ourselves an indirection.
	//
	// This is a win if the constant is only used in one function (so it doesn't
	// need to be duplicated) or duplicating the constant wouldn't increase code
	// size (implying the constant is no larger than 4 bytes).
	const Function *F = DAG.getMachineFunction().getFunction();

	// We rely on this decision to inline being idemopotent and unrelated to the
	// use-site. We know that if we inline a variable at one use site, we'll
	// inline it elsewhere too (and reuse the constant pool entry). Fast-isel
	// doesn't know about this optimization, so bail out if it's enabled else
	// we could decide to inline here (and thus never emit the GV) but require
	// the GV from fast-isel generated code.
	if (!EnableConstpoolPromotion \|\|
	DAG.getMachineFunction().getTarget().Options.EnableFastISel)
	return SDValue();

	auto *GVar = dyn_cast<GlobalVariable>(GV);
	if (!GVar \|\| !GVar->hasInitializer() \|\|
	!GVar->isConstant() \|\| !GVar->hasGlobalUnnamedAddr() \|\|
	!GVar->hasLocalLinkage())
	return SDValue();

	// Ensure that we don't try and inline any type that contains pointers. If
	// we inline a value that contains relocations, we move the relocations from
	// .data to .text which is not ideal.
	auto *Init = GVar->getInitializer();
	if (!isSimpleType(Init->getType()))
	return SDValue();

	// The constant islands pass can only really deal with alignment requests
	// <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
	// any type wanting greater alignment requirements than 4 bytes. We also
	// can only promote constants that are multiples of 4 bytes in size or
	// are paddable to a multiple of 4. Currently we only try and pad constants
	// that are strings for simplicity.
	auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
	unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
	unsigned Align = GVar->getAlignment();
	unsigned RequiredPadding = 4 - (Size % 4);
	bool PaddingPossible =
	RequiredPadding == 4 \|\| (CDAInit && CDAInit->isString());
	if (!PaddingPossible \|\| Align > 4 \|\| Size > ConstpoolPromotionMaxSize \|\|
	Size == 0)
	return SDValue();

	unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
	MachineFunction &MF = DAG.getMachineFunction();
	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

	// We can't bloat the constant pool too much, else the ConstantIslands pass
	// may fail to converge. If we haven't promoted this global yet (it may have
	// multiple uses), and promoting it would increase the constant pool size (Sz
	// > 4), ensure we have space to do so up to MaxTotal.
	if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
	if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
	ConstpoolPromotionMaxTotal)
	return SDValue();

	// This is only valid if all users are in a single function OR it has users
	// in multiple functions but it no larger than a pointer. We also check if
	// GVar has constant (non-ConstantExpr) users. If so, it essentially has its
	// address taken.
	if (!allUsersAreInFunction(GVar, F) &&
	!(Size <= 4 && allUsersAreInFunctions(GVar)))
	return SDValue();

	// We're going to inline this global. Pad it out if needed.
	if (RequiredPadding != 4) {
	StringRef S = CDAInit->getAsString();

	SmallVector<uint8_t,16> V(S.size());
	std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
	while (RequiredPadding--)
	V.push_back(0);
	Init = ConstantDataArray::get(*DAG.getContext(), V);
	}

	auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
	SDValue CPAddr =
	DAG.getTargetConstantPool(CPVal, PtrVT, /Align=/4);
	if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
	AFI->markGlobalAsPromotedToConstantPool(GVar);
	AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
	PaddedSize - 4);
	}
	++NumConstpoolPromoted;
	return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
	}

	static bool isReadOnly(const GlobalValue *GV) {
	if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
	GV = GA->getBaseObject();
	return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) \|\|
	isa<Function>(GV);
	}

	SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
	SelectionDAG &DAG) const {
	switch (Subtarget->getTargetTriple().getObjectFormat()) {
	default: llvm_unreachable("unknown object format");
	case Triple::COFF:
	return LowerGlobalAddressWindows(Op, DAG);
	case Triple::ELF:
	return LowerGlobalAddressELF(Op, DAG);
	case Triple::MachO:
	return LowerGlobalAddressDarwin(Op, DAG);
	}
	}

	SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
	SelectionDAG &DAG) const {
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDLoc dl(Op);
	const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
	const TargetMachine &TM = getTargetMachine();
	bool IsRO = isReadOnly(GV);

	// promoteToConstantPool only if not generating XO text section
	if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
	if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
	return V;

	if (isPositionIndependent()) {
	bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);

	MachineFunction &MF = DAG.getMachineFunction();
	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
	unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDLoc dl(Op);
	unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
	ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
	GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
	UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
	/AddCurrentAddress=/UseGOT_PREL);
	SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
	CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
	SDValue Result = DAG.getLoad(
	PtrVT, dl, DAG.getEntryNode(), CPAddr,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
	SDValue Chain = Result.getValue(1);
	SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
	Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
	if (UseGOT_PREL)
	Result =
	DAG.getLoad(PtrVT, dl, Chain, Result,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));
	return Result;
	} else if (Subtarget->isROPI() && IsRO) {
	// PC-relative.
	SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
	SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
	return Result;
	} else if (Subtarget->isRWPI() && !IsRO) {
	// SB-relative.
	SDValue RelAddr;
	if (Subtarget->useMovt(DAG.getMachineFunction())) {
	++NumMovwMovt;
	SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
	RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
	} else { // use literal pool for address constant
	ARMConstantPoolValue *CPV =
	ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
	SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
	CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
	RelAddr = DAG.getLoad(
	PtrVT, dl, DAG.getEntryNode(), CPAddr,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
	}
	SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
	SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
	return Result;
	}

	// If we have T2 ops, we can materialize the address directly via movt/movw
	// pair. This is always cheaper.
	if (Subtarget->useMovt(DAG.getMachineFunction())) {
	++NumMovwMovt;
	// FIXME: Once remat is capable of dealing with instructions with register
	// operands, expand this into two nodes.
	return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
	DAG.getTargetGlobalAddress(GV, dl, PtrVT));
	} else {
	SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
	CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
	return DAG.getLoad(
	PtrVT, dl, DAG.getEntryNode(), CPAddr,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
	}
	}

	SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
	SelectionDAG &DAG) const {
	assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
	"ROPI/RWPI not currently supported for Darwin");
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDLoc dl(Op);
	const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();

	if (Subtarget->useMovt(DAG.getMachineFunction()))
	++NumMovwMovt;

	// FIXME: Once remat is capable of dealing with instructions with register
	// operands, expand this into multiple nodes
	unsigned Wrapper =
	isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;

	SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
	SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);

	if (Subtarget->isGVIndirectSymbol(GV))
	Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));
	return Result;
	}

	SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
	assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
	"Windows on ARM expects to use movw/movt");
	assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
	"ROPI/RWPI not currently supported for Windows");

	const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
	const ARMII::TOF TargetFlags =
	(GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Result;
	SDLoc DL(Op);

	++NumMovwMovt;

	// FIXME: Once remat is capable of dealing with instructions with register
	// operands, expand this into two nodes.
	Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
	DAG.getTargetGlobalAddress(GV, DL, PtrVT, /Offset=/0,
	TargetFlags));
	if (GV->hasDLLImportStorageClass())
	Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));
	return Result;
	}

	SDValue
	ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
	SDLoc dl(Op);
	SDValue Val = DAG.getConstant(0, dl, MVT::i32);
	return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
	DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
	Op.getOperand(1), Val);
	}

	SDValue
	ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
	SDLoc dl(Op);
	return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
	Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
	}

	SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
	Op.getOperand(0));
	}

	SDValue
	ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
	const ARMSubtarget *Subtarget) const {
	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	SDLoc dl(Op);
	switch (IntNo) {
	default: return SDValue(); // Don't custom lower most intrinsics.
	case Intrinsic::thread_pointer: {
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
	}
	case Intrinsic::eh_sjlj_lsda: {
	MachineFunction &MF = DAG.getMachineFunction();
	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
	unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue CPAddr;
	bool IsPositionIndependent = isPositionIndependent();
	unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
	ARMConstantPoolValue *CPV =
	ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
	ARMCP::CPLSDA, PCAdj);
	CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
	CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
	SDValue Result = DAG.getLoad(
	PtrVT, dl, DAG.getEntryNode(), CPAddr,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

	if (IsPositionIndependent) {
	SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
	Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
	}
	return Result;
	}
	case Intrinsic::arm_neon_vabs:
	return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::arm_neon_vmulls:
	case Intrinsic::arm_neon_vmullu: {
	unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
	? ARMISD::VMULLs : ARMISD::VMULLu;
	return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	}
	case Intrinsic::arm_neon_vminnm:
	case Intrinsic::arm_neon_vmaxnm: {
	unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
	? ISD::FMINNUM : ISD::FMAXNUM;
	return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	}
	case Intrinsic::arm_neon_vminu:
	case Intrinsic::arm_neon_vmaxu: {
	if (Op.getValueType().isFloatingPoint())
	return SDValue();
	unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
	? ISD::UMIN : ISD::UMAX;
	return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	}
	case Intrinsic::arm_neon_vmins:
	case Intrinsic::arm_neon_vmaxs: {
	// v{min,max}s is overloaded between signed integers and floats.
	if (!Op.getValueType().isFloatingPoint()) {
	unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
	? ISD::SMIN : ISD::SMAX;
	return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	}
	unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
	? ISD::FMINNAN : ISD::FMAXNAN;
	return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	}
	case Intrinsic::arm_neon_vtbl1:
	return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::arm_neon_vtbl2:
	return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
	}
	}

	static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
	const ARMSubtarget *Subtarget) {
	SDLoc dl(Op);
	ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
	auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
	if (SSID == SyncScope::SingleThread)
	return Op;

	if (!Subtarget->hasDataBarrier()) {
	// Some ARMv6 cpus can support data barriers with an mcr instruction.
	// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
	// here.
	assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
	"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
	return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
	DAG.getConstant(0, dl, MVT::i32));
	}

	ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
	AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
	ARM_MB::MemBOpt Domain = ARM_MB::ISH;
	if (Subtarget->isMClass()) {
	// Only a full system barrier exists in the M-class architectures.
	Domain = ARM_MB::SY;
	} else if (Subtarget->preferISHSTBarriers() &&
	Ord == AtomicOrdering::Release) {
	// Swift happens to implement ISHST barriers in a way that's compatible with
	// Release semantics but weaker than ISH so we'd be fools not to use
	// it. Beware: other processors probably don't!
	Domain = ARM_MB::ISHST;
	}

	return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
	DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
	DAG.getConstant(Domain, dl, MVT::i32));
	}

	static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
	const ARMSubtarget *Subtarget) {
	// ARM pre v5TE and Thumb1 does not have preload instructions.
	if (!(Subtarget->isThumb2() \|\|
	(!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
	// Just preserve the chain.
	return Op.getOperand(0);

	SDLoc dl(Op);
	unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
	if (!isRead &&
	(!Subtarget->hasV7Ops() \|\| !Subtarget->hasMPExtension()))
	// ARMv7 with MP extension has PLDW.
	return Op.getOperand(0);

	unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
	if (Subtarget->isThumb()) {
	// Invert the bits.
	isRead = ~isRead & 1;
	isData = ~isData & 1;
	}

	return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
	Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
	DAG.getConstant(isData, dl, MVT::i32));
	}

	static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
	MachineFunction &MF = DAG.getMachineFunction();
	ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();

	// vastart just stores the address of the VarArgsFrameIndex slot into the
	// memory location argument.
	SDLoc dl(Op);
	EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
	SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
	MachinePointerInfo(SV));
	}

	SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
	CCValAssign &NextVA,
	SDValue &Root,
	SelectionDAG &DAG,
	const SDLoc &dl) const {
	MachineFunction &MF = DAG.getMachineFunction();
	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

	const TargetRegisterClass *RC;
	if (AFI->isThumb1OnlyFunction())
	RC = &ARM::tGPRRegClass;
	else
	RC = &ARM::GPRRegClass;

	// Transform the arguments stored in physical registers into virtual ones.
	unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
	SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);

	SDValue ArgValue2;
	if (NextVA.isMemLoc()) {
	MachineFrameInfo &MFI = MF.getFrameInfo();
	int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);

	// Create load node to retrieve arguments from the stack.
	SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
	ArgValue2 = DAG.getLoad(
	MVT::i32, dl, Root, FIN,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
	} else {
	Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
	ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
	}
	if (!Subtarget->isLittle())
	std::swap (ArgValue, ArgValue2);
	return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
	}

	// The remaining GPRs hold either the beginning of variable-argument
	// data, or the beginning of an aggregate passed by value (usually
	// byval). Either way, we allocate stack slots adjacent to the data
	// provided by our caller, and store the unallocated registers there.
	// If this is a variadic function, the va_list pointer will begin with
	// these values; otherwise, this reassembles a (byval) structure that
	// was split between registers and memory.
	// Return: The frame index registers were stored into.
	int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
	const SDLoc &dl, SDValue &Chain,
	const Value *OrigArg,
	unsigned InRegsParamRecordIdx,
	int ArgOffset, unsigned ArgSize) const {
	// Currently, two use-cases possible:
	// Case #1. Non-var-args function, and we meet first byval parameter.
	// Setup first unallocated register as first byval register;
	// eat all remained registers
	// (these two actions are performed by HandleByVal method).
	// Then, here, we initialize stack frame with
	// "store-reg" instructions.
	// Case #2. Var-args function, that doesn't contain byval parameters.
	// The same: eat all remained unallocated registers,
	// initialize stack frame.

	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
	unsigned RBegin, REnd;
	if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
	CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
	} else {
	unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
	RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
	REnd = ARM::R4;
	}

	if (REnd != RBegin)
	ArgOffset = -4 * (ARM::R4 - RBegin);

	auto PtrVT = getPointerTy(DAG.getDataLayout());
	int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
	SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);

	SmallVector<SDValue, 4> MemOps;
	const TargetRegisterClass *RC =
	AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;

	for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
	unsigned VReg = MF.addLiveIn(Reg, RC);
	SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
	SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
	MachinePointerInfo(OrigArg, 4 * i));
	MemOps.push_back(Store);
	FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
	}

	if (!MemOps.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
	return FrameIndex;
	}

	// Setup stack frame, the va_list pointer will start from.
	void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
	const SDLoc &dl, SDValue &Chain,
	unsigned ArgOffset,
	unsigned TotalArgRegsSaveSize,
	bool ForceMutable) const {
	MachineFunction &MF = DAG.getMachineFunction();
	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

	// Try to store any remaining integer argument regs
	// to their spots on the stack so that they may be loaded by dereferencing
	// the result of va_next.
	// If there is no regs to be stored, just point address after last
	// argument passed via stack.
	int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
	CCInfo.getInRegsParamsCount(),
	CCInfo.getNextStackOffset(), 4);
	AFI->setVarArgsFrameIndex(FrameIndex);
	}

	SDValue ARMTargetLowering::LowerFormalArguments(
	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();

	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

	// Assign locations to all of the incoming arguments.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
	*DAG.getContext());
	CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));

	SmallVector<SDValue, 16> ArgValues;
	SDValue ArgValue;
	Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
	unsigned CurArgIdx = 0;

	// Initially ArgRegsSaveSize is zero.
	// Then we increase this value each time we meet byval parameter.
	// We also increase this value in case of varargs function.
	AFI->setArgRegsSaveSize(0);

	// Calculate the amount of stack space that we need to allocate to store
	// byval and variadic arguments that are passed in registers.
	// We need to know this before we allocate the first byval or variadic
	// argument, as they will be allocated a stack slot below the CFA (Canonical
	// Frame Address, the stack pointer at entry to the function).
	unsigned ArgRegBegin = ARM::R4;
	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
	break;

	CCValAssign &VA = ArgLocs[i];
	unsigned Index = VA.getValNo();
	ISD::ArgFlagsTy Flags = Ins[Index].Flags;
	if (!Flags.isByVal())
	continue;

	assert(VA.isMemLoc() && "unexpected byval pointer in reg");
	unsigned RBegin, REnd;
	CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
	ArgRegBegin = std::min(ArgRegBegin, RBegin);

	CCInfo.nextInRegsParam();
	}
	CCInfo.rewindByValRegsInfo();

	int lastInsIndex = -1;
	if (isVarArg && MFI.hasVAStart()) {
	unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
	if (RegIdx != array_lengthof(GPRArgRegs))
	ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
	}

	unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
	AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i];
	if (Ins[VA.getValNo()].isOrigArg()) {
	std::advance(CurOrigArg,
	Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
	CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
	}
	// Arguments stored in registers.
	if (VA.isRegLoc()) {
	EVT RegVT = VA.getLocVT();

	if (VA.needsCustom()) {
	// f64 and vector types are split up into multiple registers or
	// combinations of registers and stack slots.
	if (VA.getLocVT() == MVT::v2f64) {
	SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
	Chain, DAG, dl);
	VA = ArgLocs[++i]; // skip ahead to next loc
	SDValue ArgValue2;
	if (VA.isMemLoc()) {
	int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
	SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
	ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
	MachinePointerInfo::getFixedStack(
	DAG.getMachineFunction(), FI));
	} else {
	ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
	Chain, DAG, dl);
	}
	ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
	ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
	ArgValue, ArgValue1,
	DAG.getIntPtrConstant(0, dl));
	ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
	ArgValue, ArgValue2,
	DAG.getIntPtrConstant(1, dl));
	} else
	ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);

	} else {
	const TargetRegisterClass *RC;

	if (RegVT == MVT::f32)
	RC = &ARM::SPRRegClass;
	else if (RegVT == MVT::f64)
	RC = &ARM::DPRRegClass;
	else if (RegVT == MVT::v2f64)
	RC = &ARM::QPRRegClass;
	else if (RegVT == MVT::i32)
	RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
	: &ARM::GPRRegClass;
	else
	llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");

	// Transform the arguments in physical registers into virtual ones.
	unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
	ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
	}

	// If this is an 8 or 16-bit value, it is really passed promoted
	// to 32 bits. Insert an assert[sz]ext to capture this, then
	// truncate to the right size.
	switch (VA.getLocInfo()) {
	default: llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full: break;
	case CCValAssign::BCvt:
	ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
	break;
	case CCValAssign::SExt:
	ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
	DAG.getValueType(VA.getValVT()));
	ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
	break;
	case CCValAssign::ZExt:
	ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
	DAG.getValueType(VA.getValVT()));
	ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
	break;
	}

	InVals.push_back(ArgValue);

	} else { // VA.isRegLoc()
	// sanity check
	assert(VA.isMemLoc());
	assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");

	int index = VA.getValNo();

	// Some Ins[] entries become multiple ArgLoc[] entries.
	// Process them only once.
	if (index != lastInsIndex)
	{
	ISD::ArgFlagsTy Flags = Ins[index].Flags;
	// FIXME: For now, all byval parameter objects are marked mutable.
	// This can be changed with more analysis.
	// In case of tail call optimization mark all arguments mutable.
	// Since they could be overwritten by lowering of arguments in case of
	// a tail call.
	if (Flags.isByVal()) {
	assert(Ins[index].isOrigArg() &&
	"Byval arguments cannot be implicit");
	unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();

	int FrameIndex = StoreByValRegs(
	CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
	VA.getLocMemOffset(), Flags.getByValSize());
	InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
	CCInfo.nextInRegsParam();
	} else {
	unsigned FIOffset = VA.getLocMemOffset();
	int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
	FIOffset, true);

	// Create load nodes to retrieve arguments from the stack.
	SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
	InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
	MachinePointerInfo::getFixedStack(
	DAG.getMachineFunction(), FI)));
	}
	lastInsIndex = index;
	}
	}
	}

	// varargs
	if (isVarArg && MFI.hasVAStart())
	VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
	CCInfo.getNextStackOffset(),
	TotalArgRegsSaveSize);

	AFI->setArgumentStackSize(CCInfo.getNextStackOffset());

	return Chain;
	}

	/// isFloatingPointZero - Return true if this is +0.0.
	static bool isFloatingPointZero(SDValue Op) {
	if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
	return CFP->getValueAPF().isPosZero();
	else if (ISD::isEXTLoad(Op.getNode()) \|\| ISD::isNON_EXTLoad(Op.getNode())) {
	// Maybe this has already been legalized into the constant pool?
	if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
	SDValue WrapperOp = Op.getOperand(1).getOperand(0);
	if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
	return CFP->getValueAPF().isPosZero();
	}
	} else if (Op->getOpcode() == ISD::BITCAST &&
	Op->getValueType(0) == MVT::f64) {
	// Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
	// created by LowerConstantFP().
	SDValue BitcastOp = Op->getOperand(0);
	if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
	isNullConstant(BitcastOp->getOperand(0)))
	return true;
	}
	return false;
	}

	/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
	/// the given operands.
	SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
	SDValue &ARMcc, SelectionDAG &DAG,
	const SDLoc &dl) const {
	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
	unsigned C = RHSC->getZExtValue();
	if (!isLegalICmpImmediate(C)) {
	// Constant does not fit, try adjusting it by one?
	switch (CC) {
	default: break;
	case ISD::SETLT:
	case ISD::SETGE:
	if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
	CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
	RHS = DAG.getConstant(C - 1, dl, MVT::i32);
	}
	break;
	case ISD::SETULT:
	case ISD::SETUGE:
	if (C != 0 && isLegalICmpImmediate(C-1)) {
	CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
	RHS = DAG.getConstant(C - 1, dl, MVT::i32);
	}
	break;
	case ISD::SETLE:
	case ISD::SETGT:
	if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
	CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
	RHS = DAG.getConstant(C + 1, dl, MVT::i32);
	}
	break;
	case ISD::SETULE:
	case ISD::SETUGT:
	if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
	CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
	RHS = DAG.getConstant(C + 1, dl, MVT::i32);
	}
	break;
	}
	}
	}

	ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
	ARMISD::NodeType CompareType;
	switch (CondCode) {
	default:
	CompareType = ARMISD::CMP;
	break;
	case ARMCC::EQ:
	case ARMCC::NE:
	// Uses only Z Flag
	CompareType = ARMISD::CMPZ;
	break;
	}
	ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
	return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
	}

	/// Returns a appropriate VFP CMP (fcmp{s\|d}+fmstat) for the given operands.
	SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
	SelectionDAG &DAG, const SDLoc &dl,
	bool InvalidOnQNaN) const {
	assert(!Subtarget->isFPOnlySP() \|\| RHS.getValueType() != MVT::f64);
	SDValue Cmp;
	SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
	if (!isFloatingPointZero(RHS))
	Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
	else
	Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
	return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
	}

	/// duplicateCmp - Glue values can have only one use, so this function
	/// duplicates a comparison node.
	SDValue
	ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
	unsigned Opc = Cmp.getOpcode();
	SDLoc DL(Cmp);
	if (Opc == ARMISD::CMP \|\| Opc == ARMISD::CMPZ)
	return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));

	assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
	Cmp = Cmp.getOperand(0);
	Opc = Cmp.getOpcode();
	if (Opc == ARMISD::CMPFP)
	Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
	Cmp.getOperand(1), Cmp.getOperand(2));
	else {
	assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
	Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
	Cmp.getOperand(1));
	}
	return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
	}

	std::pair<SDValue, SDValue>
	ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
	SDValue &ARMcc) const {
	assert(Op.getValueType() == MVT::i32 && "Unsupported value type");

	SDValue Value, OverflowCmp;
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	SDLoc dl(Op);

	// FIXME: We are currently always generating CMPs because we don't support
	// generating CMN through the backend. This is not as good as the natural
	// CMP case because it causes a register dependency and cannot be folded
	// later.

	switch (Op.getOpcode()) {
	default:
	llvm_unreachable("Unknown overflow instruction!");
	case ISD::SADDO:
	ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
	Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
	OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
	break;
	case ISD::UADDO:
	ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
	Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
	OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
	break;
	case ISD::SSUBO:
	ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
	Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
	OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
	break;
	case ISD::USUBO:
	ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
	Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
	OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
	break;
	} // switch (...)

	return std::make_pair(Value, OverflowCmp);
	}

	SDValue
	ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
	// Let legalize expand this if it isn't a legal type yet.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
	return SDValue();

	SDValue Value, OverflowCmp;
	SDValue ARMcc;
	std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
	SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
	SDLoc dl(Op);
	// We use 0 and 1 as false and true values.
	SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
	SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
	EVT VT = Op.getValueType();

	SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
	ARMcc, CCR, OverflowCmp);

	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
	return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
	}

	SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
	SDValue Cond = Op.getOperand(0);
	SDValue SelectTrue = Op.getOperand(1);
	SDValue SelectFalse = Op.getOperand(2);
	SDLoc dl(Op);
	unsigned Opc = Cond.getOpcode();

	if (Cond.getResNo() == 1 &&
	(Opc == ISD::SADDO \|\| Opc == ISD::UADDO \|\| Opc == ISD::SSUBO \|\|
	Opc == ISD::USUBO)) {
	if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
	return SDValue();

	SDValue Value, OverflowCmp;
	SDValue ARMcc;
	std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
	SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
	EVT VT = Op.getValueType();

	return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
	OverflowCmp, DAG);
	}

	// Convert:
	//
	// (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
	// (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
	//
	if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
	const ConstantSDNode *CMOVTrue =
	dyn_cast<ConstantSDNode>(Cond.getOperand(0));
	const ConstantSDNode *CMOVFalse =
	dyn_cast<ConstantSDNode>(Cond.getOperand(1));

	if (CMOVTrue && CMOVFalse) {
	unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
	unsigned CMOVFalseVal = CMOVFalse->getZExtValue();

	SDValue True;
	SDValue False;
	if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
	True = SelectTrue;
	False = SelectFalse;
	} else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
	True = SelectFalse;
	False = SelectTrue;
	}

	if (True.getNode() && False.getNode()) {
	EVT VT = Op.getValueType();
	SDValue ARMcc = Cond.getOperand(2);
	SDValue CCR = Cond.getOperand(3);
	SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
	assert(True.getValueType() == VT);
	return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
	}
	}
	}

	// ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
	// undefined bits before doing a full-word comparison with zero.
	Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
	DAG.getConstant(1, dl, Cond.getValueType()));

	return DAG.getSelectCC(dl, Cond,
	DAG.getConstant(0, dl, Cond.getValueType()),
	SelectTrue, SelectFalse, ISD::SETNE);
	}

	static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
	bool &swpCmpOps, bool &swpVselOps) {
	// Start by selecting the GE condition code for opcodes that return true for
	// 'equality'
	if (CC == ISD::SETUGE \|\| CC == ISD::SETOGE \|\| CC == ISD::SETOLE \|\|
	CC == ISD::SETULE)
	CondCode = ARMCC::GE;

	// and GT for opcodes that return false for 'equality'.
	else if (CC == ISD::SETUGT \|\| CC == ISD::SETOGT \|\| CC == ISD::SETOLT \|\|
	CC == ISD::SETULT)
	CondCode = ARMCC::GT;

	// Since we are constrained to GE/GT, if the opcode contains 'less', we need
	// to swap the compare operands.
	if (CC == ISD::SETOLE \|\| CC == ISD::SETULE \|\| CC == ISD::SETOLT \|\|
	CC == ISD::SETULT)
	swpCmpOps = true;

	// Both GT and GE are ordered comparisons, and return false for 'unordered'.
	// If we have an unordered opcode, we need to swap the operands to the VSEL
	// instruction (effectively negating the condition).
	//
	// This also has the effect of swapping which one of 'less' or 'greater'
	// returns true, so we also swap the compare operands. It also switches
	// whether we return true for 'equality', so we compensate by picking the
	// opposite condition code to our original choice.
	if (CC == ISD::SETULE \|\| CC == ISD::SETULT \|\| CC == ISD::SETUGE \|\|
	CC == ISD::SETUGT) {
	swpCmpOps = !swpCmpOps;
	swpVselOps = !swpVselOps;
	CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
	}

	// 'ordered' is 'anything but unordered', so use the VS condition code and
	// swap the VSEL operands.
	if (CC == ISD::SETO) {
	CondCode = ARMCC::VS;
	swpVselOps = true;
	}

	// 'unordered or not equal' is 'anything but equal', so use the EQ condition
	// code and swap the VSEL operands.
	if (CC == ISD::SETUNE) {
	CondCode = ARMCC::EQ;
	swpVselOps = true;
	}
	}

	SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
	SDValue TrueVal, SDValue ARMcc, SDValue CCR,
	SDValue Cmp, SelectionDAG &DAG) const {
	if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
	FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
	DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
	TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
	DAG.getVTList(MVT::i32, MVT::i32), TrueVal);

	SDValue TrueLow = TrueVal.getValue(0);
	SDValue TrueHigh = TrueVal.getValue(1);
	SDValue FalseLow = FalseVal.getValue(0);
	SDValue FalseHigh = FalseVal.getValue(1);

	SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
	ARMcc, CCR, Cmp);
	SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
	ARMcc, CCR, duplicateCmp(Cmp, DAG));

	return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
	} else {
	return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
	Cmp);
	}
	}

	static bool isGTorGE(ISD::CondCode CC) {
	return CC == ISD::SETGT \|\| CC == ISD::SETGE;
	}

	static bool isLTorLE(ISD::CondCode CC) {
	return CC == ISD::SETLT \|\| CC == ISD::SETLE;
	}

	// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
	// All of these conditions (and their <= and >= counterparts) will do:
	// x < k ? k : x
	// x > k ? x : k
	// k < x ? x : k
	// k > x ? k : x
	static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
	const SDValue TrueVal, const SDValue FalseVal,
	const ISD::CondCode CC, const SDValue K) {
	return (isGTorGE(CC) &&
	((K == LHS && K == TrueVal) \|\| (K == RHS && K == FalseVal))) \|\|
	(isLTorLE(CC) &&
	((K == RHS && K == TrueVal) \|\| (K == LHS && K == FalseVal)));
	}

	// Similar to isLowerSaturate(), but checks for upper-saturating conditions.
	static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
	const SDValue TrueVal, const SDValue FalseVal,
	const ISD::CondCode CC, const SDValue K) {
	return (isGTorGE(CC) &&
	((K == RHS && K == TrueVal) \|\| (K == LHS && K == FalseVal))) \|\|
	(isLTorLE(CC) &&
	((K == LHS && K == TrueVal) \|\| (K == RHS && K == FalseVal)));
	}

	// Check if two chained conditionals could be converted into SSAT.
	//
	// SSAT can replace a set of two conditional selectors that bound a number to an
	// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
	//
	// x < -k ? -k : (x > k ? k : x)
	// x < -k ? -k : (x < k ? x : k)
	// x > -k ? (x > k ? k : x) : -k
	// x < k ? (x < -k ? -k : x) : k
	// etc.
	//
	// It returns true if the conversion can be done, false otherwise.
	// Additionally, the variable is returned in parameter V and the constant in K.
	static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
	uint64_t &K) {
	SDValue LHS1 = Op.getOperand(0);
	SDValue RHS1 = Op.getOperand(1);
	SDValue TrueVal1 = Op.getOperand(2);
	SDValue FalseVal1 = Op.getOperand(3);
	ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();

	const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
	if (Op2.getOpcode() != ISD::SELECT_CC)
	return false;

	SDValue LHS2 = Op2.getOperand(0);
	SDValue RHS2 = Op2.getOperand(1);
	SDValue TrueVal2 = Op2.getOperand(2);
	SDValue FalseVal2 = Op2.getOperand(3);
	ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();

	// Find out which are the constants and which are the variables
	// in each conditional
	SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
	? &RHS1
	: nullptr;
	SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
	? &RHS2
	: nullptr;
	SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
	SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
	SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
	SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;

	// We must detect cases where the original operations worked with 16- or
	// 8-bit values. In such case, V2Tmp != V2 because the comparison operations
	// must work with sign-extended values but the select operations return
	// the original non-extended value.
	SDValue V2TmpReg = V2Tmp;
	if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
	V2TmpReg = V2Tmp->getOperand(0);

	// Check that the registers and the constants have the correct values
	// in both conditionals
	if (!K1 \|\| !K2 \|\| K1 == Op2 \|\| K2 != K2Tmp \|\| V1Tmp != V2Tmp \|\|
	V2TmpReg != V2)
	return false;

	// Figure out which conditional is saturating the lower/upper bound.
	const SDValue *LowerCheckOp =
	isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
	? &Op
	: isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
	? &Op2
	: nullptr;
	const SDValue *UpperCheckOp =
	isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
	? &Op
	: isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
	? &Op2
	: nullptr;

	if (!UpperCheckOp \|\| !LowerCheckOp \|\| LowerCheckOp == UpperCheckOp)
	return false;

	// Check that the constant in the lower-bound check is
	// the opposite of the constant in the upper-bound check
	// in 1's complement.
	int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
	int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
	int64_t PosVal = std::max(Val1, Val2);

	if (((Val1 > Val2 && UpperCheckOp == &Op) \|\|
	(Val1 < Val2 && UpperCheckOp == &Op2)) &&
	Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {

	V = V2;
	K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
	return true;
	}

	return false;
	}

	SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	SDLoc dl(Op);

	// Try to convert two saturating conditional selects into a single SSAT
	SDValue SatValue;
	uint64_t SatConstant;
	if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) \|\| Subtarget->isThumb2()) &&
	isSaturatingConditional(Op, SatValue, SatConstant))
	return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
	DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));

	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
	SDValue TrueVal = Op.getOperand(2);
	SDValue FalseVal = Op.getOperand(3);

	if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
	DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
	dl);

	// If softenSetCCOperands only returned one value, we should compare it to
	// zero.
	if (!RHS.getNode()) {
	RHS = DAG.getConstant(0, dl, LHS.getValueType());
	CC = ISD::SETNE;
	}
	}

	if (LHS.getValueType() == MVT::i32) {
	// Try to generate VSEL on ARMv8.
	// The VSEL instruction can't use all the usual ARM condition
	// codes: it only has two bits to select the condition code, so it's
	// constrained to use only GE, GT, VS and EQ.
	//
	// To implement all the various ISD::SETXXX opcodes, we sometimes need to
	// swap the operands of the previous compare instruction (effectively
	// inverting the compare condition, swapping 'less' and 'greater') and
	// sometimes need to swap the operands to the VSEL (which inverts the
	// condition in the sense of firing whenever the previous condition didn't)
	if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 \|\|
	TrueVal.getValueType() == MVT::f64)) {
	ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
	if (CondCode == ARMCC::LT \|\| CondCode == ARMCC::LE \|\|
	CondCode == ARMCC::VC \|\| CondCode == ARMCC::NE) {
	CC = ISD::getSetCCInverse(CC, true);
	std::swap(TrueVal, FalseVal);
	}
	}

	SDValue ARMcc;
	SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
	SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
	return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
	}

	ARMCC::CondCodes CondCode, CondCode2;
	bool InvalidOnQNaN;
	FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);

	// Try to generate VMAXNM/VMINNM on ARMv8.
	if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 \|\|
	TrueVal.getValueType() == MVT::f64)) {
	bool swpCmpOps = false;
	bool swpVselOps = false;
	checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);

	if (CondCode == ARMCC::GT \|\| CondCode == ARMCC::GE \|\|
	CondCode == ARMCC::VS \|\| CondCode == ARMCC::EQ) {
	if (swpCmpOps)
	std::swap(LHS, RHS);
	if (swpVselOps)
	std::swap(TrueVal, FalseVal);
	}
	}

	SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
	SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
	SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
	SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
	if (CondCode2 != ARMCC::AL) {
	SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
	// FIXME: Needs another CMP because flag can have but one use.
	SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
	Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
	}
	return Result;
	}

	/// canChangeToInt - Given the fp compare operand, return true if it is suitable
	/// to morph to an integer compare sequence.
	static bool canChangeToInt(SDValue Op, bool &SeenZero,
	const ARMSubtarget *Subtarget) {
	SDNode *N = Op.getNode();
	if (!N->hasOneUse())
	// Otherwise it requires moving the value from fp to integer registers.
	return false;
	if (!N->getNumValues())
	return false;
	EVT VT = Op.getValueType();
	if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
	// f32 case is generally profitable. f64 case only makes sense when vcmpe +
	// vmrs are very slow, e.g. cortex-a8.
	return false;

	if (isFloatingPointZero(Op)) {
	SeenZero = true;
	return true;
	}
	return ISD::isNormalLoad(N);
	}

	static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
	if (isFloatingPointZero(Op))
	return DAG.getConstant(0, SDLoc(Op), MVT::i32);

	if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
	return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
	Ld->getPointerInfo(), Ld->getAlignment(),
	Ld->getMemOperand()->getFlags());

	llvm_unreachable("Unknown VFP cmp argument!");
	}

	static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
	SDValue &RetVal1, SDValue &RetVal2) {
	SDLoc dl(Op);

	if (isFloatingPointZero(Op)) {
	RetVal1 = DAG.getConstant(0, dl, MVT::i32);
	RetVal2 = DAG.getConstant(0, dl, MVT::i32);
	return;
	}

	if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
	SDValue Ptr = Ld->getBasePtr();
	RetVal1 =
	DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
	Ld->getAlignment(), Ld->getMemOperand()->getFlags());

	EVT PtrType = Ptr.getValueType();
	unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
	SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
	PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
	RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
	Ld->getPointerInfo().getWithOffset(4), NewAlign,
	Ld->getMemOperand()->getFlags());
	return;
	}

	llvm_unreachable("Unknown VFP cmp argument!");
	}

	/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
	/// f32 and even f64 comparisons to integer ones.
	SDValue
	ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
	SDValue LHS = Op.getOperand(2);
	SDValue RHS = Op.getOperand(3);
	SDValue Dest = Op.getOperand(4);
	SDLoc dl(Op);

	bool LHSSeenZero = false;
	bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
	bool RHSSeenZero = false;
	bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
	if (LHSOk && RHSOk && (LHSSeenZero \|\| RHSSeenZero)) {
	// If unsafe fp math optimization is enabled and there are no other uses of
	// the CMP operands, and the condition code is EQ or NE, we can optimize it
	// to an integer comparison.
	if (CC == ISD::SETOEQ)
	CC = ISD::SETEQ;
	else if (CC == ISD::SETUNE)
	CC = ISD::SETNE;

	SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
	SDValue ARMcc;
	if (LHS.getValueType() == MVT::f32) {
	LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
	bitcastf32Toi32(LHS, DAG), Mask);
	RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
	bitcastf32Toi32(RHS, DAG), Mask);
	SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
	SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
	return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
	Chain, Dest, ARMcc, CCR, Cmp);
	}

	SDValue LHS1, LHS2;
	SDValue RHS1, RHS2;
	expandf64Toi32(LHS, DAG, LHS1, LHS2);
	expandf64Toi32(RHS, DAG, RHS1, RHS2);
	LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
	RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
	ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
	ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
	SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
	return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
	}

	return SDValue();
	}

	SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
	SDValue LHS = Op.getOperand(2);
	SDValue RHS = Op.getOperand(3);
	SDValue Dest = Op.getOperand(4);
	SDLoc dl(Op);

	if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
	DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
	dl);

	// If softenSetCCOperands only returned one value, we should compare it to
	// zero.
	if (!RHS.getNode()) {
	RHS = DAG.getConstant(0, dl, LHS.getValueType());
	CC = ISD::SETNE;
	}
	}

	if (LHS.getValueType() == MVT::i32) {
	SDValue ARMcc;
	SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
	SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
	return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
	Chain, Dest, ARMcc, CCR, Cmp);
	}

	assert(LHS.getValueType() == MVT::f32 \|\| LHS.getValueType() == MVT::f64);

	if (getTargetMachine().Options.UnsafeFPMath &&
	(CC == ISD::SETEQ \|\| CC == ISD::SETOEQ \|\|
	CC == ISD::SETNE \|\| CC == ISD::SETUNE)) {
	if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
	return Result;
	}

	ARMCC::CondCodes CondCode, CondCode2;
	bool InvalidOnQNaN;
	FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);

	SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
	SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
	SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
	SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
	SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
	if (CondCode2 != ARMCC::AL) {
	ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
	SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
	Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
	}
	return Res;
	}

	SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	SDValue Table = Op.getOperand(1);
	SDValue Index = Op.getOperand(2);
	SDLoc dl(Op);

	EVT PTy = getPointerTy(DAG.getDataLayout());
	JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
	SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
	Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
	Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
	SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
	if (Subtarget->isThumb2() \|\| (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
	// Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
	// which does another jump to the destination. This also makes it easier
	// to translate it to TBB / TBH later (Thumb2 only).
	// FIXME: This might not work if the function is extremely large.
	return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
	Addr, Op.getOperand(2), JTI);
	}
	if (isPositionIndependent() \|\| Subtarget->isROPI()) {
	Addr =
	DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
	MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
	Chain = Addr.getValue(1);
	Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
	return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
	} else {
	Addr =
	DAG.getLoad(PTy, dl, Chain, Addr,
	MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
	Chain = Addr.getValue(1);
	return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
	}
	}

	static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
	EVT VT = Op.getValueType();
	SDLoc dl(Op);

	if (Op.getValueType().getVectorElementType() == MVT::i32) {
	if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
	return Op;
	return DAG.UnrollVectorOp(Op.getNode());
	}

	assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
	"Invalid type for custom lowering!");
	if (VT != MVT::v4i16)
	return DAG.UnrollVectorOp(Op.getNode());

	Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
	}

	SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	if (VT.isVector())
	return LowerVectorFP_TO_INT(Op, DAG);
	if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
	RTLIB::Libcall LC;
	if (Op.getOpcode() == ISD::FP_TO_SINT)
	LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
	Op.getValueType());
	else
	LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
	Op.getValueType());
	return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
	/isSigned/ false, SDLoc(Op)).first;
	}

	return Op;
	}

	static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
	EVT VT = Op.getValueType();
	SDLoc dl(Op);

	if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
	if (VT.getVectorElementType() == MVT::f32)
	return Op;
	return DAG.UnrollVectorOp(Op.getNode());
	}

	assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
	"Invalid type for custom lowering!");
	if (VT != MVT::v4f32)
	return DAG.UnrollVectorOp(Op.getNode());

	unsigned CastOpc;
	unsigned Opc;
	switch (Op.getOpcode()) {
	default: llvm_unreachable("Invalid opcode!");
	case ISD::SINT_TO_FP:
	CastOpc = ISD::SIGN_EXTEND;
	Opc = ISD::SINT_TO_FP;
	break;
	case ISD::UINT_TO_FP:
	CastOpc = ISD::ZERO_EXTEND;
	Opc = ISD::UINT_TO_FP;
	break;
	}

	Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
	return DAG.getNode(Opc, dl, VT, Op);
	}

	SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	if (VT.isVector())
	return LowerVectorINT_TO_FP(Op, DAG);
	if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
	RTLIB::Libcall LC;
	if (Op.getOpcode() == ISD::SINT_TO_FP)
	LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
	Op.getValueType());
	else
	LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
	Op.getValueType());
	return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
	/isSigned/ false, SDLoc(Op)).first;
	}

	return Op;
	}

	SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
	// Implement fcopysign with a fabs and a conditional fneg.
	SDValue Tmp0 = Op.getOperand(0);
	SDValue Tmp1 = Op.getOperand(1);
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	EVT SrcVT = Tmp1.getValueType();
	bool InGPR = Tmp0.getOpcode() == ISD::BITCAST \|\|
	Tmp0.getOpcode() == ARMISD::VMOVDRR;
	bool UseNEON = !InGPR && Subtarget->hasNEON();

	if (UseNEON) {
	// Use VBSL to copy the sign bit.
	unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
	SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
	DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
	EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
	if (VT == MVT::f64)
	Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
	DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
	DAG.getConstant(32, dl, MVT::i32));
	else /if (VT == MVT::f32)/
	Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
	if (SrcVT == MVT::f32) {
	Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
	if (VT == MVT::f64)
	Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
	DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
	DAG.getConstant(32, dl, MVT::i32));
	} else if (VT == MVT::f32)
	Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
	DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
	DAG.getConstant(32, dl, MVT::i32));
	Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
	Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);

	SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
	dl, MVT::i32);
	AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
	SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
	DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));

	SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
	DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
	DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
	if (VT == MVT::f32) {
	Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
	Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
	DAG.getConstant(0, dl, MVT::i32));
	} else {
	Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
	}

	return Res;
	}

	// Bitcast operand 1 to i32.
	if (SrcVT == MVT::f64)
	Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
	Tmp1).getValue(1);
	Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);

	// Or in the signbit with integer operations.
	SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
	SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
	Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
	if (VT == MVT::f32) {
	Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
	DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
	return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
	DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
	}

	// f64: Or the high part with signbit and then combine two parts.
	Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
	Tmp0);
	SDValue Lo = Tmp0.getValue(0);
	SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
	Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
	return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
	}

	SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	MFI.setReturnAddressIsTaken(true);

	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
	return SDValue();

	EVT VT = Op.getValueType();
	SDLoc dl(Op);
	unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	if (Depth) {
	SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
	SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
	return DAG.getLoad(VT, dl, DAG.getEntryNode(),
	DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
	MachinePointerInfo());
	}

	// Return LR, which contains the return address. Mark it an implicit live-in.
	unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
	return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
	}

	SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
	const ARMBaseRegisterInfo &ARI =
	static_cast<const ARMBaseRegisterInfo>(RegInfo);
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	MFI.setFrameAddressIsTaken(true);

	EVT VT = Op.getValueType();
	SDLoc dl(Op); // FIXME probably not meaningful
	unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	unsigned FrameReg = ARI.getFrameRegister(MF);
	SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
	while (Depth--)
	FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
	MachinePointerInfo());
	return FrameAddr;
	}

	// FIXME? Maybe this could be a TableGen attribute on some registers and
	// this table could be generated automatically from RegInfo.
	unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
	SelectionDAG &DAG) const {
	unsigned Reg = StringSwitch<unsigned>(RegName)
	.Case("sp", ARM::SP)
	.Default(0);
	if (Reg)
	return Reg;
	report_fatal_error(Twine("Invalid register name \""
	+ StringRef(RegName) + "\"."));
	}

	// Result is 64 bit value so split into two 32 bit values and return as a
	// pair of values.
	static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) {
	SDLoc DL(N);

	// This function is only supposed to be called for i64 type destination.
	assert(N->getValueType(0) == MVT::i64
	&& "ExpandREAD_REGISTER called for non-i64 type result.");

	SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
	DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
	N->getOperand(0),
	N->getOperand(1));

	Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
	Read.getValue(1)));
	Results.push_back(Read.getOperand(0));
	}

	/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
	/// When \p DstVT, the destination type of \p BC, is on the vector
	/// register bank and the source of bitcast, \p Op, operates on the same bank,
	/// it might be possible to combine them, such that everything stays on the
	/// vector register bank.
	/// \p return The node that would replace \p BT, if the combine
	/// is possible.
	static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
	SelectionDAG &DAG) {
	SDValue Op = BC->getOperand(0);
	EVT DstVT = BC->getValueType(0);

	// The only vector instruction that can produce a scalar (remember,
	// since the bitcast was about to be turned into VMOVDRR, the source
	// type is i64) from a vector is EXTRACT_VECTOR_ELT.
	// Moreover, we can do this combine only if there is one use.
	// Finally, if the destination type is not a vector, there is not
	// much point on forcing everything on the vector bank.
	if (!DstVT.isVector() \|\| Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	!Op.hasOneUse())
	return SDValue();

	// If the index is not constant, we will introduce an additional
	// multiply that will stick.
	// Give up in that case.
	ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
	if (!Index)
	return SDValue();
	unsigned DstNumElt = DstVT.getVectorNumElements();

	// Compute the new index.
	const APInt &APIntIndex = Index->getAPIntValue();
	APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
	NewIndex *= APIntIndex;
	// Check if the new constant index fits into i32.
	if (NewIndex.getBitWidth() > 32)
	return SDValue();

	// vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
	// vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
	SDLoc dl(Op);
	SDValue ExtractSrc = Op.getOperand(0);
	EVT VecVT = EVT::getVectorVT(
	*DAG.getContext(), DstVT.getScalarType(),
	ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
	SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
	DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
	}

	/// ExpandBITCAST - If the target supports VFP, this function is called to
	/// expand a bit convert where either the source or destination type is i64 to
	/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
	/// operand type is illegal (e.g., v2f32 for a target that doesn't support
	/// vectors), since the legalizer won't know what to do with that.
	static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDLoc dl(N);
	SDValue Op = N->getOperand(0);

	// This function is only supposed to be called for i64 types, either as the
	// source or destination of the bit convert.
	EVT SrcVT = Op.getValueType();
	EVT DstVT = N->getValueType(0);
	assert((SrcVT == MVT::i64 \|\| DstVT == MVT::i64) &&
	"ExpandBITCAST called for non-i64 type");

	// Turn i64->f64 into VMOVDRR.
	if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
	// Do not force values to GPRs (this is what VMOVDRR does for the inputs)
	// if we can combine the bitcast with its source.
	if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
	return Val;

	SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
	DAG.getConstant(0, dl, MVT::i32));
	SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
	DAG.getConstant(1, dl, MVT::i32));
	return DAG.getNode(ISD::BITCAST, dl, DstVT,
	DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
	}

	// Turn f64->i64 into VMOVRRD.
	if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
	SDValue Cvt;
	if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
	SrcVT.getVectorNumElements() > 1)
	Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
	DAG.getVTList(MVT::i32, MVT::i32),
	DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
	else
	Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
	DAG.getVTList(MVT::i32, MVT::i32), Op);
	// Merge the pieces into a single i64 value.
	return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
	}

	return SDValue();
	}

	/// getZeroVector - Returns a vector of specified type with all zero elements.
	/// Zero vectors are used to represent vector negation and in those cases
	/// will be implemented with the NEON VNEG instruction. However, VNEG does
	/// not support i64 elements, so sometimes the zero vectors will need to be
	/// explicitly constructed. Regardless, use a canonical VMOV to create the
	/// zero vector.
	static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
	assert(VT.isVector() && "Expected a vector type");
	// The canonical modified immediate encoding of a zero vector is....0!
	SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
	EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
	SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
	return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
	}

	/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
	/// i32 values and take a 2 x i32 value to shift plus a shift amount.
	SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getNumOperands() == 3 && "Not a double-shift!");
	EVT VT = Op.getValueType();
	unsigned VTBits = VT.getSizeInBits();
	SDLoc dl(Op);
	SDValue ShOpLo = Op.getOperand(0);
	SDValue ShOpHi = Op.getOperand(1);
	SDValue ShAmt = Op.getOperand(2);
	SDValue ARMcc;
	SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
	unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;

	assert(Op.getOpcode() == ISD::SRA_PARTS \|\| Op.getOpcode() == ISD::SRL_PARTS);

	SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
	DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
	SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
	SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
	DAG.getConstant(VTBits, dl, MVT::i32));
	SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
	SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
	SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
	SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
	ISD::SETGE, ARMcc, DAG, dl);
	SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
	ARMcc, CCR, CmpLo);


	SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
	SDValue HiBigShift = Opc == ISD::SRA
	? DAG.getNode(Opc, dl, VT, ShOpHi,
	DAG.getConstant(VTBits - 1, dl, VT))
	: DAG.getConstant(0, dl, VT);
	SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
	ISD::SETGE, ARMcc, DAG, dl);
	SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
	ARMcc, CCR, CmpHi);

	SDValue Ops[2] = { Lo, Hi };
	return DAG.getMergeValues(Ops, dl);
	}

	/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
	/// i32 values and take a 2 x i32 value to shift plus a shift amount.
	SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getNumOperands() == 3 && "Not a double-shift!");
	EVT VT = Op.getValueType();
	unsigned VTBits = VT.getSizeInBits();
	SDLoc dl(Op);
	SDValue ShOpLo = Op.getOperand(0);
	SDValue ShOpHi = Op.getOperand(1);
	SDValue ShAmt = Op.getOperand(2);
	SDValue ARMcc;
	SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);

	assert(Op.getOpcode() == ISD::SHL_PARTS);
	SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
	DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
	SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
	SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
	SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);

	SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
	DAG.getConstant(VTBits, dl, MVT::i32));
	SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
	SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
	ISD::SETGE, ARMcc, DAG, dl);
	SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
	ARMcc, CCR, CmpHi);

	SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
	ISD::SETGE, ARMcc, DAG, dl);
	SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
	SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
	DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);

	SDValue Ops[2] = { Lo, Hi };
	return DAG.getMergeValues(Ops, dl);
	}

	SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
	SelectionDAG &DAG) const {
	// The rounding mode is in bits 23:22 of the FPSCR.
	// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
	// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
	// so that the shift + and get folded into a bitfield extract.
	SDLoc dl(Op);
	SDValue Ops[] = { DAG.getEntryNode(),
	DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };

	SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
	SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
	DAG.getConstant(1U << 22, dl, MVT::i32));
	SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
	DAG.getConstant(22, dl, MVT::i32));
	return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
	DAG.getConstant(3, dl, MVT::i32));
	}

	static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
	const ARMSubtarget *ST) {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	if (VT.isVector()) {
	assert(ST->hasNEON());

	// Compute the least significant set bit: LSB = X & -X
	SDValue X = N->getOperand(0);
	SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
	SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);

	EVT ElemTy = VT.getVectorElementType();

	if (ElemTy == MVT::i8) {
	// Compute with: cttz(x) = ctpop(lsb - 1)
	SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
	DAG.getTargetConstant(1, dl, ElemTy));
	SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
	return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
	}

	if ((ElemTy == MVT::i16 \|\| ElemTy == MVT::i32) &&
	(N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
	// Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
	unsigned NumBits = ElemTy.getSizeInBits();
	SDValue WidthMinus1 =
	DAG.getNode(ARMISD::VMOVIMM, dl, VT,
	DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
	SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
	return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
	}

	// Compute with: cttz(x) = ctpop(lsb - 1)

	// Since we can only compute the number of bits in a byte with vcnt.8, we
	// have to gather the result with pairwise addition (vpaddl) for i16, i32,
	// and i64.

	// Compute LSB - 1.
	SDValue Bits;
	if (ElemTy == MVT::i64) {
	// Load constant 0xffff'ffff'ffff'ffff to register.
	SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
	DAG.getTargetConstant(0x1eff, dl, MVT::i32));
	Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
	} else {
	SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
	DAG.getTargetConstant(1, dl, ElemTy));
	Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
	}

	// Count #bits with vcnt.8.
	EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
	SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
	SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);

	// Gather the #bits with vpaddl (pairwise add.)
	EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
	SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
	DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
	Cnt8);
	if (ElemTy == MVT::i16)
	return Cnt16;

	EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
	SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
	DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
	Cnt16);
	if (ElemTy == MVT::i32)
	return Cnt32;

	assert(ElemTy == MVT::i64);
	SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
	DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
	Cnt32);
	return Cnt64;
	}

	if (!ST->hasV6T2Ops())
	return SDValue();

	SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
	return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
	}

	/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
	/// for each 16-bit element from operand, repeated. The basic idea is to
	/// leverage vcnt to get the 8-bit counts, gather and add the results.
	///
	/// Trace for v4i16:
	/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
	/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
	/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
	/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
	/// [b0 b1 b2 b3 b4 b5 b6 b7]
	/// +[b1 b0 b3 b2 b5 b4 b7 b6]
	/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
	/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
	static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
	SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
	SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
	SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
	SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
	return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
	}

	/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
	/// bit-count for each 16-bit element from the operand. We need slightly
	/// different sequencing for v4i16 and v8i16 to stay within NEON's available
	/// 64/128-bit registers.
	///
	/// Trace for v4i16:
	/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
	/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
	/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
	/// v4i16:Extracted = [k0 k1 k2 k3 ]
	static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
	if (VT.is64BitVector()) {
	SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
	DAG.getIntPtrConstant(0, DL));
	} else {
	SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
	BitCounts, DAG.getIntPtrConstant(0, DL));
	return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
	}
	}

	/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
	/// bit-count for each 32-bit element from the operand. The idea here is
	/// to split the vector into 16-bit elements, leverage the 16-bit count
	/// routine, and then combine the results.
	///
	/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
	/// input = [v0 v1 ] (vi: 32-bit elements)
	/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
	/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
	/// vrev: N0 = [k1 k0 k3 k2 ]
	/// [k0 k1 k2 k3 ]
	/// N1 =+[k1 k0 k3 k2 ]
	/// [k0 k2 k1 k3 ]
	/// N2 =+[k1 k3 k0 k2 ]
	/// [k0 k2 k1 k3 ]
	/// Extended =+[k1 k3 k0 k2 ]
	/// [k0 k2 ]
	/// Extracted=+[k1 k3 ]
	///
	static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;

	SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
	SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
	SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
	SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
	SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);

	if (VT.is64BitVector()) {
	SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
	DAG.getIntPtrConstant(0, DL));
	} else {
	SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
	}
	}

	static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
	const ARMSubtarget *ST) {
	EVT VT = N->getValueType(0);

	assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
	assert((VT == MVT::v2i32 \|\| VT == MVT::v4i32 \|\|
	VT == MVT::v4i16 \|\| VT == MVT::v8i16) &&
	"Unexpected type for custom ctpop lowering");

	if (VT.getVectorElementType() == MVT::i32)
	return lowerCTPOP32BitElements(N, DAG);
	else
	return lowerCTPOP16BitElements(N, DAG);
	}

	static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
	const ARMSubtarget *ST) {
	EVT VT = N->getValueType(0);
	SDLoc dl(N);

	if (!VT.isVector())
	return SDValue();

	// Lower vector shifts on NEON to use VSHL.
	assert(ST->hasNEON() && "unexpected vector shift");

	// Left shifts translate directly to the vshiftu intrinsic.
	if (N->getOpcode() == ISD::SHL)
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
	DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
	MVT::i32),
	N->getOperand(0), N->getOperand(1));

	assert((N->getOpcode() == ISD::SRA \|\|
	N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");

	// NEON uses the same intrinsics for both left and right shifts. For
	// right shifts, the shift amounts are negative, so negate the vector of
	// shift amounts.
	EVT ShiftVT = N->getOperand(1).getValueType();
	SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
	getZeroVector(ShiftVT, DAG, dl),
	N->getOperand(1));
	Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
	Intrinsic::arm_neon_vshifts :
	Intrinsic::arm_neon_vshiftu);
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
	DAG.getConstant(vshiftInt, dl, MVT::i32),
	N->getOperand(0), NegatedCount);
	}

	static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
	const ARMSubtarget *ST) {
	EVT VT = N->getValueType(0);
	SDLoc dl(N);

	// We can get here for a node like i32 = ISD::SHL i32, i64
	if (VT != MVT::i64)
	return SDValue();

	assert((N->getOpcode() == ISD::SRL \|\| N->getOpcode() == ISD::SRA) &&
	"Unknown shift to lower!");

	// We only lower SRA, SRL of 1 here, all others use generic lowering.
	if (!isOneConstant(N->getOperand(1)))
	return SDValue();

	// If we are in thumb mode, we don't have RRX.
	if (ST->isThumb1Only()) return SDValue();

	// Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
	SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
	DAG.getConstant(0, dl, MVT::i32));
	SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
	DAG.getConstant(1, dl, MVT::i32));

	// First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
	// captures the result into a carry flag.
	unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
	Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);

	// The low part is an ARMISD::RRX operand, which shifts the carry in.
	Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));

	// Merge the pieces into a single i64 value.
	return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
	}

	static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
	SDValue TmpOp0, TmpOp1;
	bool Invert = false;
	bool Swap = false;
	unsigned Opc = 0;

	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	SDValue CC = Op.getOperand(2);
	EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
	EVT VT = Op.getValueType();
	ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
	SDLoc dl(Op);

	if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
	(SetCCOpcode == ISD::SETEQ \|\| SetCCOpcode == ISD::SETNE)) {
	// Special-case integer 64-bit equality comparisons. They aren't legal,
	// but they can be lowered with a few vector instructions.
	unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
	EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
	SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
	SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
	SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
	DAG.getCondCode(ISD::SETEQ));
	SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
	SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
	Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
	if (SetCCOpcode == ISD::SETNE)
	Merged = DAG.getNOT(dl, Merged, CmpVT);
	Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
	return Merged;
	}

	if (CmpVT.getVectorElementType() == MVT::i64)
	// 64-bit comparisons are not legal in general.
	return SDValue();

	if (Op1.getValueType().isFloatingPoint()) {
	switch (SetCCOpcode) {
	default: llvm_unreachable("Illegal FP comparison");
	case ISD::SETUNE:
	case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
	case ISD::SETOEQ:
	case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
	case ISD::SETOLT:
	case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETOGT:
	case ISD::SETGT: Opc = ARMISD::VCGT; break;
	case ISD::SETOLE:
	case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETOGE:
	case ISD::SETGE: Opc = ARMISD::VCGE; break;
	case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
	case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
	case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH;
	case ISD::SETONE:
	// Expand this to (OLT \| OGT).
	TmpOp0 = Op0;
	TmpOp1 = Op1;
	Opc = ISD::OR;
	Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
	Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
	break;
	case ISD::SETUO:
	Invert = true;
	LLVM_FALLTHROUGH;
	case ISD::SETO:
	// Expand this to (OLT \| OGE).
	TmpOp0 = Op0;
	TmpOp1 = Op1;
	Opc = ISD::OR;
	Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
	Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
	break;
	}
	} else {
	// Integer comparisons.
	switch (SetCCOpcode) {
	default: llvm_unreachable("Illegal integer comparison");
	case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
	case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
	case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETGT: Opc = ARMISD::VCGT; break;
	case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETGE: Opc = ARMISD::VCGE; break;
	case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
	case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
	}

	// Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
	if (Opc == ARMISD::VCEQ) {

	SDValue AndOp;
	if (ISD::isBuildVectorAllZeros(Op1.getNode()))
	AndOp = Op0;
	else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
	AndOp = Op1;

	// Ignore bitconvert.
	if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
	AndOp = AndOp.getOperand(0);

	if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
	Opc = ARMISD::VTST;
	Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
	Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
	Invert = !Invert;
	}
	}
	}

	if (Swap)
	std::swap(Op0, Op1);

	// If one of the operands is a constant vector zero, attempt to fold the
	// comparison to a specialized compare-against-zero form.
	SDValue SingleOp;
	if (ISD::isBuildVectorAllZeros(Op1.getNode()))
	SingleOp = Op0;
	else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
	if (Opc == ARMISD::VCGE)
	Opc = ARMISD::VCLEZ;
	else if (Opc == ARMISD::VCGT)
	Opc = ARMISD::VCLTZ;
	SingleOp = Op1;
	}

	SDValue Result;
	if (SingleOp.getNode()) {
	switch (Opc) {
	case ARMISD::VCEQ:
	Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
	case ARMISD::VCGE:
	Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
	case ARMISD::VCLEZ:
	Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
	case ARMISD::VCGT:
	Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
	case ARMISD::VCLTZ:
	Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
	default:
	Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
	}
	} else {
	Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
	}

	Result = DAG.getSExtOrTrunc(Result, dl, VT);

	if (Invert)
	Result = DAG.getNOT(dl, Result, VT);

	return Result;
	}

	static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) {
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	SDValue Carry = Op.getOperand(2);
	SDValue Cond = Op.getOperand(3);
	SDLoc DL(Op);

	assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.");

	assert(Carry.getOpcode() != ISD::CARRY_FALSE);
	SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
	SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);

	SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
	SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
	SDValue ARMcc = DAG.getConstant(
	IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
	SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
	SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
	Cmp.getValue(1), SDValue());
	return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
	CCR, Chain.getValue(1));
	}

	/// isNEONModifiedImm - Check if the specified splat value corresponds to a
	/// valid vector constant for a NEON instruction with a "modified immediate"
	/// operand (e.g., VMOV). If so, return the encoded value.
	static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
	unsigned SplatBitSize, SelectionDAG &DAG,
	const SDLoc &dl, EVT &VT, bool is128Bits,
	NEONModImmType type) {
	unsigned OpCmode, Imm;

	// SplatBitSize is set to the smallest size that splats the vector, so a
	// zero vector will always have SplatBitSize == 8. However, NEON modified
	// immediate instructions others than VMOV do not support the 8-bit encoding
	// of a zero vector, and the default encoding of zero is supposed to be the
	// 32-bit version.
	if (SplatBits == 0)
	SplatBitSize = 32;

	switch (SplatBitSize) {
	case 8:
	if (type != VMOVModImm)
	return SDValue();
	// Any 1-byte value is OK. Op=0, Cmode=1110.
	assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
	OpCmode = 0xe;
	Imm = SplatBits;
	VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
	break;

	case 16:
	// NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
	VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
	if ((SplatBits & ~0xff) == 0) {
	// Value = 0x00nn: Op=x, Cmode=100x.
	OpCmode = 0x8;
	Imm = SplatBits;
	break;
	}
	if ((SplatBits & ~0xff00) == 0) {
	// Value = 0xnn00: Op=x, Cmode=101x.
	OpCmode = 0xa;
	Imm = SplatBits >> 8;
	break;
	}
	return SDValue();

	case 32:
	// NEON's 32-bit VMOV supports splat values where:
	// * only one byte is nonzero, or
	// * the least significant byte is 0xff and the second byte is nonzero, or
	// * the least significant 2 bytes are 0xff and the third is nonzero.
	VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
	if ((SplatBits & ~0xff) == 0) {
	// Value = 0x000000nn: Op=x, Cmode=000x.
	OpCmode = 0;
	Imm = SplatBits;
	break;
	}
	if ((SplatBits & ~0xff00) == 0) {
	// Value = 0x0000nn00: Op=x, Cmode=001x.
	OpCmode = 0x2;
	Imm = SplatBits >> 8;
	break;
	}
	if ((SplatBits & ~0xff0000) == 0) {
	// Value = 0x00nn0000: Op=x, Cmode=010x.
	OpCmode = 0x4;
	Imm = SplatBits >> 16;
	break;
	}
	if ((SplatBits & ~0xff000000) == 0) {
	// Value = 0xnn000000: Op=x, Cmode=011x.
	OpCmode = 0x6;
	Imm = SplatBits >> 24;
	break;
	}

	// cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
	if (type == OtherModImm) return SDValue();

	if ((SplatBits & ~0xffff) == 0 &&
	((SplatBits \| SplatUndef) & 0xff) == 0xff) {
	// Value = 0x0000nnff: Op=x, Cmode=1100.
	OpCmode = 0xc;
	Imm = SplatBits >> 8;
	break;
	}

	if ((SplatBits & ~0xffffff) == 0 &&
	((SplatBits \| SplatUndef) & 0xffff) == 0xffff) {
	// Value = 0x00nnffff: Op=x, Cmode=1101.
	OpCmode = 0xd;
	Imm = SplatBits >> 16;
	break;
	}

	// Note: there are a few 32-bit splat values (specifically: 00ffff00,
	// ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
	// VMOV.I32. A (very) minor optimization would be to replicate the value
	// and fall through here to test for a valid 64-bit splat. But, then the
	// caller would also need to check and handle the change in size.
	return SDValue();

	case 64: {
	if (type != VMOVModImm)
	return SDValue();
	// NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
	uint64_t BitMask = 0xff;
	uint64_t Val = 0;
	unsigned ImmMask = 1;
	Imm = 0;
	for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
	if (((SplatBits \| SplatUndef) & BitMask) == BitMask) {
	Val \|= BitMask;
	Imm \|= ImmMask;
	} else if ((SplatBits & BitMask) != 0) {
	return SDValue();
	}
	BitMask <<= 8;
	ImmMask <<= 1;
	}

	if (DAG.getDataLayout().isBigEndian())
	// swap higher and lower 32 bit word
	Imm = ((Imm & 0xf) << 4) \| ((Imm & 0xf0) >> 4);

	// Op=1, Cmode=1110.
	OpCmode = 0x1e;
	VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
	break;
	}

	default:
	llvm_unreachable("unexpected size for isNEONModifiedImm");
	}

	unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
	return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
	}

	SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
	const ARMSubtarget *ST) const {
	bool IsDouble = Op.getValueType() == MVT::f64;
	ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
	const APFloat &FPVal = CFP->getValueAPF();

	// Prevent floating-point constants from using literal loads
	// when execute-only is enabled.
	if (ST->genExecuteOnly()) {
	APInt INTVal = FPVal.bitcastToAPInt();
	SDLoc DL(CFP);
	if (IsDouble) {
	SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
	SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
	if (!ST->isLittle())
	std::swap(Lo, Hi);
	return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
	} else {
	return DAG.getConstant(INTVal, DL, MVT::i32);
	}
	}

	if (!ST->hasVFP3())
	return SDValue();

	// Use the default (constant pool) lowering for double constants when we have
	// an SP-only FPU
	if (IsDouble && Subtarget->isFPOnlySP())
	return SDValue();

	// Try splatting with a VMOV.f32...
	int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);

	if (ImmVal != -1) {
	if (IsDouble \|\| !ST->useNEONForSinglePrecisionFP()) {
	// We have code in place to select a valid ConstantFP already, no need to
	// do any mangling.
	return Op;
	}

	// It's a float and we are trying to use NEON operations where
	// possible. Lower it to a splat followed by an extract.
	SDLoc DL(Op);
	SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
	SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
	NewVal);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
	DAG.getConstant(0, DL, MVT::i32));
	}

	// The rest of our options are NEON only, make sure that's allowed before
	// proceeding..
	if (!ST->hasNEON() \|\| (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
	return SDValue();

	EVT VMovVT;
	uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();

	// It wouldn't really be worth bothering for doubles except for one very
	// important value, which does happen to match: 0.0. So make sure we don't do
	// anything stupid.
	if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
	return SDValue();

	// Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
	SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
	VMovVT, false, VMOVModImm);
	if (NewVal != SDValue()) {
	SDLoc DL(Op);
	SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
	NewVal);
	if (IsDouble)
	return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);

	// It's a float: cast and extract a vector element.
	SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
	VecConstant);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
	DAG.getConstant(0, DL, MVT::i32));
	}

	// Finally, try a VMVN.i32
	NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
	false, VMVNModImm);
	if (NewVal != SDValue()) {
	SDLoc DL(Op);
	SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);

	if (IsDouble)
	return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);

	// It's a float: cast and extract a vector element.
	SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
	VecConstant);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
	DAG.getConstant(0, DL, MVT::i32));
	}

	return SDValue();
	}

	// check if an VEXT instruction can handle the shuffle mask when the
	// vector sources of the shuffle are the same.
	static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
	unsigned NumElts = VT.getVectorNumElements();

	// Assume that the first shuffle index is not UNDEF. Fail if it is.
	if (M[0] < 0)
	return false;

	Imm = M[0];

	// If this is a VEXT shuffle, the immediate value is the index of the first
	// element. The other shuffle indices must be the successive elements after
	// the first one.
	unsigned ExpectedElt = Imm;
	for (unsigned i = 1; i < NumElts; ++i) {
	// Increment the expected index. If it wraps around, just follow it
	// back to index zero and keep going.
	++ExpectedElt;
	if (ExpectedElt == NumElts)
	ExpectedElt = 0;

	if (M[i] < 0) continue; // ignore UNDEF indices
	if (ExpectedElt != static_cast<unsigned>(M[i]))
	return false;
	}

	return true;
	}

	static bool isVEXTMask(ArrayRef<int> M, EVT VT,
	bool &ReverseVEXT, unsigned &Imm) {
	unsigned NumElts = VT.getVectorNumElements();
	ReverseVEXT = false;

	// Assume that the first shuffle index is not UNDEF. Fail if it is.
	if (M[0] < 0)
	return false;

	Imm = M[0];

	// If this is a VEXT shuffle, the immediate value is the index of the first
	// element. The other shuffle indices must be the successive elements after
	// the first one.
	unsigned ExpectedElt = Imm;
	for (unsigned i = 1; i < NumElts; ++i) {
	// Increment the expected index. If it wraps around, it may still be
	// a VEXT but the source vectors must be swapped.
	ExpectedElt += 1;
	if (ExpectedElt == NumElts * 2) {
	ExpectedElt = 0;
	ReverseVEXT = true;
	}

	if (M[i] < 0) continue; // ignore UNDEF indices
	if (ExpectedElt != static_cast<unsigned>(M[i]))
	return false;
	}

	// Adjust the index value if the source operands will be swapped.
	if (ReverseVEXT)
	Imm -= NumElts;

	return true;
	}

	/// isVREVMask - Check if a vector shuffle corresponds to a VREV
	/// instruction with the specified blocksize. (The order of the elements
	/// within each block of the vector is reversed.)
	static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
	assert((BlockSize==16 \|\| BlockSize==32 \|\| BlockSize==64) &&
	"Only possible block sizes for VREV are: 16, 32, 64");

	unsigned EltSz = VT.getScalarSizeInBits();
	if (EltSz == 64)
	return false;

	unsigned NumElts = VT.getVectorNumElements();
	unsigned BlockElts = M[0] + 1;
	// If the first shuffle index is UNDEF, be optimistic.
	if (M[0] < 0)
	BlockElts = BlockSize / EltSz;

	if (BlockSize <= EltSz \|\| BlockSize != BlockElts * EltSz)
	return false;

	for (unsigned i = 0; i < NumElts; ++i) {
	if (M[i] < 0) continue; // ignore UNDEF indices
	if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
	return false;
	}

	return true;
	}

	static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
	// We can handle <8 x i8> vector shuffles. If the index in the mask is out of
	// range, then 0 is placed into the resulting vector. So pretty much any mask
	// of 8 elements can work here.
	return VT == MVT::v8i8 && M.size() == 8;
	}

	// Checks whether the shuffle mask represents a vector transpose (VTRN) by
	// checking that pairs of elements in the shuffle mask represent the same index
	// in each vector, incrementing the expected index by 2 at each step.
	// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
	// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
	// v2={e,f,g,h}
	// WhichResult gives the offset for each element in the mask based on which
	// of the two results it belongs to.
	//
	// The transpose can be represented either as:
	// result1 = shufflevector v1, v2, result1_shuffle_mask
	// result2 = shufflevector v1, v2, result2_shuffle_mask
	// where v1/v2 and the shuffle masks have the same number of elements
	// (here WhichResult (see below) indicates which result is being checked)
	//
	// or as:
	// results = shufflevector v1, v2, shuffle_mask
	// where both results are returned in one vector and the shuffle mask has twice
	// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
	// want to check the low half and high half of the shuffle mask as if it were
	// the other case
	static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned EltSz = VT.getScalarSizeInBits();
	if (EltSz == 64)
	return false;

	unsigned NumElts = VT.getVectorNumElements();
	if (M.size() != NumElts && M.size() != NumElts*2)
	return false;

	// If the mask is twice as long as the input vector then we need to check the
	// upper and lower parts of the mask with a matching value for WhichResult
	// FIXME: A mask with only even values will be rejected in case the first
	// element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
	// M[0] is used to determine WhichResult
	for (unsigned i = 0; i < M.size(); i += NumElts) {
	if (M.size() == NumElts * 2)
	WhichResult = i / NumElts;
	else
	WhichResult = M[i] == 0 ? 0 : 1;
	for (unsigned j = 0; j < NumElts; j += 2) {
	if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) \|\|
	(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
	return false;
	}
	}

	if (M.size() == NumElts*2)
	WhichResult = 0;

	return true;
	}

	/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
	/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
	/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
	static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
	unsigned EltSz = VT.getScalarSizeInBits();
	if (EltSz == 64)
	return false;

	unsigned NumElts = VT.getVectorNumElements();
	if (M.size() != NumElts && M.size() != NumElts*2)
	return false;

	for (unsigned i = 0; i < M.size(); i += NumElts) {
	if (M.size() == NumElts * 2)
	WhichResult = i / NumElts;
	else
	WhichResult = M[i] == 0 ? 0 : 1;
	for (unsigned j = 0; j < NumElts; j += 2) {
	if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) \|\|
	(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
	return false;
	}
	}

	if (M.size() == NumElts*2)
	WhichResult = 0;

	return true;
	}

	// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
	// that the mask elements are either all even and in steps of size 2 or all odd
	// and in steps of size 2.
	// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
	// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
	// v2={e,f,g,h}
	// Requires similar checks to that of isVTRNMask with
	// respect the how results are returned.
	static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned EltSz = VT.getScalarSizeInBits();
	if (EltSz == 64)
	return false;

	unsigned NumElts = VT.getVectorNumElements();
	if (M.size() != NumElts && M.size() != NumElts*2)
	return false;

	for (unsigned i = 0; i < M.size(); i += NumElts) {
	- WhichResult = M[i] == 0 ? 0 : 1;
	+ if (M.size() == NumElts * 2)
	+ WhichResult = i / NumElts;
	+ else
	+ WhichResult = M[i] == 0 ? 0 : 1;
	for (unsigned j = 0; j < NumElts; ++j) {
	if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
	return false;
	}
	}

	if (M.size() == NumElts*2)
	WhichResult = 0;

	// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
	if (VT.is64BitVector() && EltSz == 32)
	return false;

	return true;
	}

	/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
	/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
	/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
	static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
	unsigned EltSz = VT.getScalarSizeInBits();
	if (EltSz == 64)
	return false;

	unsigned NumElts = VT.getVectorNumElements();
	if (M.size() != NumElts && M.size() != NumElts*2)
	return false;

	unsigned Half = NumElts / 2;
	for (unsigned i = 0; i < M.size(); i += NumElts) {
	- WhichResult = M[i] == 0 ? 0 : 1;
	+ if (M.size() == NumElts * 2)
	+ WhichResult = i / NumElts;
	+ else
	+ WhichResult = M[i] == 0 ? 0 : 1;
	for (unsigned j = 0; j < NumElts; j += Half) {
	unsigned Idx = WhichResult;
	for (unsigned k = 0; k < Half; ++k) {
	int MIdx = M[i + j + k];
	if (MIdx >= 0 && (unsigned) MIdx != Idx)
	return false;
	Idx += 2;
	}
	}
	}

	if (M.size() == NumElts*2)
	WhichResult = 0;

	// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
	if (VT.is64BitVector() && EltSz == 32)
	return false;

	return true;
	}

	// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
	// that pairs of elements of the shufflemask represent the same index in each
	// vector incrementing sequentially through the vectors.
	// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
	// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
	// v2={e,f,g,h}
	// Requires similar checks to that of isVTRNMask with respect the how results
	// are returned.
	static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned EltSz = VT.getScalarSizeInBits();
	if (EltSz == 64)
	return false;

	unsigned NumElts = VT.getVectorNumElements();
	if (M.size() != NumElts && M.size() != NumElts*2)
	return false;

	for (unsigned i = 0; i < M.size(); i += NumElts) {
	- WhichResult = M[i] == 0 ? 0 : 1;
	+ if (M.size() == NumElts * 2)
	+ WhichResult = i / NumElts;
	+ else
	+ WhichResult = M[i] == 0 ? 0 : 1;
	unsigned Idx = WhichResult * NumElts / 2;
	for (unsigned j = 0; j < NumElts; j += 2) {
	if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) \|\|
	(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
	return false;
	Idx += 1;
	}
	}

	if (M.size() == NumElts*2)
	WhichResult = 0;

	// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
	if (VT.is64BitVector() && EltSz == 32)
	return false;

	return true;
	}

	/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
	/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
	/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
	static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
	unsigned EltSz = VT.getScalarSizeInBits();
	if (EltSz == 64)
	return false;

	unsigned NumElts = VT.getVectorNumElements();
	if (M.size() != NumElts && M.size() != NumElts*2)
	return false;

	for (unsigned i = 0; i < M.size(); i += NumElts) {
	- WhichResult = M[i] == 0 ? 0 : 1;
	+ if (M.size() == NumElts * 2)
	+ WhichResult = i / NumElts;
	+ else
	+ WhichResult = M[i] == 0 ? 0 : 1;
	unsigned Idx = WhichResult * NumElts / 2;
	for (unsigned j = 0; j < NumElts; j += 2) {
	if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) \|\|
	(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
	return false;
	Idx += 1;
	}
	}

	if (M.size() == NumElts*2)
	WhichResult = 0;

	// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
	if (VT.is64BitVector() && EltSz == 32)
	return false;

	return true;
	}

	/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
	/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
	static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
	unsigned &WhichResult,
	bool &isV_UNDEF) {
	isV_UNDEF = false;
	if (isVTRNMask(ShuffleMask, VT, WhichResult))
	return ARMISD::VTRN;
	if (isVUZPMask(ShuffleMask, VT, WhichResult))
	return ARMISD::VUZP;
	if (isVZIPMask(ShuffleMask, VT, WhichResult))
	return ARMISD::VZIP;

	isV_UNDEF = true;
	if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
	return ARMISD::VTRN;
	if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
	return ARMISD::VUZP;
	if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
	return ARMISD::VZIP;

	return 0;
	}

	/// \return true if this is a reverse operation on an vector.
	static bool isReverseMask(ArrayRef<int> M, EVT VT) {
	unsigned NumElts = VT.getVectorNumElements();
	// Make sure the mask has the right size.
	if (NumElts != M.size())
	return false;

	// Look for <15, ..., 3, -1, 1, 0>.
	for (unsigned i = 0; i != NumElts; ++i)
	if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
	return false;

	return true;
	}

	// If N is an integer constant that can be moved into a register in one
	// instruction, return an SDValue of such a constant (will become a MOV
	// instruction). Otherwise return null.
	static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
	const ARMSubtarget *ST, const SDLoc &dl) {
	uint64_t Val;
	if (!isa<ConstantSDNode>(N))
	return SDValue();
	Val = cast<ConstantSDNode>(N)->getZExtValue();

	if (ST->isThumb1Only()) {
	if (Val <= 255 \|\| ~Val <= 255)
	return DAG.getConstant(Val, dl, MVT::i32);
	} else {
	if (ARM_AM::getSOImmVal(Val) != -1 \|\| ARM_AM::getSOImmVal(~Val) != -1)
	return DAG.getConstant(Val, dl, MVT::i32);
	}
	return SDValue();
	}

	// If this is a case we can't handle, return null and let the default
	// expansion code take care of it.
	SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
	const ARMSubtarget *ST) const {
	BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
	SDLoc dl(Op);
	EVT VT = Op.getValueType();

	APInt SplatBits, SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
	if (SplatUndef.isAllOnesValue())
	return DAG.getUNDEF(VT);

	if (SplatBitSize <= 64) {
	// Check if an immediate VMOV works.
	EVT VmovVT;
	SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
	SplatUndef.getZExtValue(), SplatBitSize,
	DAG, dl, VmovVT, VT.is128BitVector(),
	VMOVModImm);
	if (Val.getNode()) {
	SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
	return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
	}

	// Try an immediate VMVN.
	uint64_t NegatedImm = (~SplatBits).getZExtValue();
	Val = isNEONModifiedImm(NegatedImm,
	SplatUndef.getZExtValue(), SplatBitSize,
	DAG, dl, VmovVT, VT.is128BitVector(),
	VMVNModImm);
	if (Val.getNode()) {
	SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
	return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
	}

	// Use vmov.f32 to materialize other v2f32 and v4f32 splats.
	if ((VT == MVT::v2f32 \|\| VT == MVT::v4f32) && SplatBitSize == 32) {
	int ImmVal = ARM_AM::getFP32Imm(SplatBits);
	if (ImmVal != -1) {
	SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
	return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
	}
	}
	}
	}

	// Scan through the operands to see if only one value is used.
	//
	// As an optimisation, even if more than one value is used it may be more
	// profitable to splat with one value then change some lanes.
	//
	// Heuristically we decide to do this if the vector has a "dominant" value,
	// defined as splatted to more than half of the lanes.
	unsigned NumElts = VT.getVectorNumElements();
	bool isOnlyLowElement = true;
	bool usesOnlyOneValue = true;
	bool hasDominantValue = false;
	bool isConstant = true;

	// Map of the number of times a particular SDValue appears in the
	// element list.
	DenseMap<SDValue, unsigned> ValueCounts;
	SDValue Value;
	for (unsigned i = 0; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	if (V.isUndef())
	continue;
	if (i > 0)
	isOnlyLowElement = false;
	if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
	isConstant = false;

	ValueCounts.insert(std::make_pair(V, 0));
	unsigned &Count = ValueCounts[V];

	// Is this value dominant? (takes up more than half of the lanes)
	if (++Count > (NumElts / 2)) {
	hasDominantValue = true;
	Value = V;
	}
	}
	if (ValueCounts.size() != 1)
	usesOnlyOneValue = false;
	if (!Value.getNode() && !ValueCounts.empty())
	Value = ValueCounts.begin()->first;

	if (ValueCounts.empty())
	return DAG.getUNDEF(VT);

	// Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
	// Keep going if we are hitting this case.
	if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);

	unsigned EltSize = VT.getScalarSizeInBits();

	// Use VDUP for non-constant splats. For f32 constant splats, reduce to
	// i32 and try again.
	if (hasDominantValue && EltSize <= 32) {
	if (!isConstant) {
	SDValue N;

	// If we are VDUPing a value that comes directly from a vector, that will
	// cause an unnecessary move to and from a GPR, where instead we could
	// just use VDUPLANE. We can only do this if the lane being extracted
	// is at a constant index, as the VDUP from lane instructions only have
	// constant-index forms.
	ConstantSDNode *constIndex;
	if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	(constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
	// We need to create a new undef vector to use for the VDUPLANE if the
	// size of the vector from which we get the value is different than the
	// size of the vector that we need to create. We will insert the element
	// such that the register coalescer will remove unnecessary copies.
	if (VT != Value->getOperand(0).getValueType()) {
	unsigned index = constIndex->getAPIntValue().getLimitedValue() %
	VT.getVectorNumElements();
	N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
	DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
	Value, DAG.getConstant(index, dl, MVT::i32)),
	DAG.getConstant(index, dl, MVT::i32));
	} else
	N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
	Value->getOperand(0), Value->getOperand(1));
	} else
	N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);

	if (!usesOnlyOneValue) {
	// The dominant value was splatted as 'N', but we now have to insert
	// all differing elements.
	for (unsigned I = 0; I < NumElts; ++I) {
	if (Op.getOperand(I) == Value)
	continue;
	SmallVector<SDValue, 3> Ops;
	Ops.push_back(N);
	Ops.push_back(Op.getOperand(I));
	Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
	N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
	}
	}
	return N;
	}
	if (VT.getVectorElementType().isFloatingPoint()) {
	SmallVector<SDValue, 8> Ops;
	for (unsigned i = 0; i < NumElts; ++i)
	Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
	Op.getOperand(i)));
	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
	SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
	Val = LowerBUILD_VECTOR(Val, DAG, ST);
	if (Val.getNode())
	return DAG.getNode(ISD::BITCAST, dl, VT, Val);
	}
	if (usesOnlyOneValue) {
	SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
	if (isConstant && Val.getNode())
	return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
	}
	}

	// If all elements are constants and the case above didn't get hit, fall back
	// to the default expansion, which will generate a load from the constant
	// pool.
	if (isConstant)
	return SDValue();

	// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
	if (NumElts >= 4) {
	SDValue shuffle = ReconstructShuffle(Op, DAG);
	if (shuffle != SDValue())
	return shuffle;
	}

	if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
	// If we haven't found an efficient lowering, try splitting a 128-bit vector
	// into two 64-bit vectors; we might discover a better way to lower it.
	SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
	EVT ExtVT = VT.getVectorElementType();
	EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
	SDValue Lower =
	DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
	if (Lower.getOpcode() == ISD::BUILD_VECTOR)
	Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
	SDValue Upper = DAG.getBuildVector(
	HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
	if (Upper.getOpcode() == ISD::BUILD_VECTOR)
	Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
	if (Lower && Upper)
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
	}

	// Vectors with 32- or 64-bit elements can be built by directly assigning
	// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
	// will be legalized.
	if (EltSize >= 32) {
	// Do the expansion with floating-point types, since that is what the VFP
	// registers are defined to use, and since i64 is not legal.
	EVT EltVT = EVT::getFloatingPointVT(EltSize);
	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
	SmallVector<SDValue, 8> Ops;
	for (unsigned i = 0; i < NumElts; ++i)
	Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
	SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
	return DAG.getNode(ISD::BITCAST, dl, VT, Val);
	}

	// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
	// know the default expansion would otherwise fall back on something even
	// worse. For a vector with one or two non-undef values, that's
	// scalar_to_vector for the elements followed by a shuffle (provided the
	// shuffle is valid for the target) and materialization element by element
	// on the stack followed by a load for everything else.
	if (!isConstant && !usesOnlyOneValue) {
	SDValue Vec = DAG.getUNDEF(VT);
	for (unsigned i = 0 ; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	if (V.isUndef())
	continue;
	SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
	Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
	}
	return Vec;
	}

	return SDValue();
	}

	// Gather data to see if the operation can be modelled as a
	// shuffle in combination with VEXTs.
	SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	unsigned NumElts = VT.getVectorNumElements();

	struct ShuffleSourceInfo {
	SDValue Vec;
	unsigned MinElt = std::numeric_limits<unsigned>::max();
	unsigned MaxElt = 0;

	// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
	// be compatible with the shuffle we intend to construct. As a result
	// ShuffleVec will be some sliding window into the original Vec.
	SDValue ShuffleVec;

	// Code should guarantee that element i in Vec starts at element "WindowBase
	// + i * WindowScale in ShuffleVec".
	int WindowBase = 0;
	int WindowScale = 1;

	ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}

	bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
	};

	// First gather all vectors used as an immediate source for this BUILD_VECTOR
	// node.
	SmallVector<ShuffleSourceInfo, 2> Sources;
	for (unsigned i = 0; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	if (V.isUndef())
	continue;
	else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
	// A shuffle can only come from building a vector from various
	// elements of other vectors.
	return SDValue();
	} else if (!isa<ConstantSDNode>(V.getOperand(1))) {
	// Furthermore, shuffles require a constant mask, whereas extractelts
	// accept variable indices.
	return SDValue();
	}

	// Add this element source to the list if it's not already there.
	SDValue SourceVec = V.getOperand(0);
	auto Source = llvm::find(Sources, SourceVec);
	if (Source == Sources.end())
	Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));

	// Update the minimum and maximum lane number seen.
	unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
	Source->MinElt = std::min(Source->MinElt, EltNo);
	Source->MaxElt = std::max(Source->MaxElt, EltNo);
	}

	// Currently only do something sane when at most two source vectors
	// are involved.
	if (Sources.size() > 2)
	return SDValue();

	// Find out the smallest element size among result and two sources, and use
	// it as element size to build the shuffle_vector.
	EVT SmallestEltTy = VT.getVectorElementType();
	for (auto &Source : Sources) {
	EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
	if (SrcEltTy.bitsLT(SmallestEltTy))
	SmallestEltTy = SrcEltTy;
	}
	unsigned ResMultiplier =
	VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
	NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
	EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);

	// If the source vector is too wide or too narrow, we may nevertheless be able
	// to construct a compatible shuffle either by concatenating it with UNDEF or
	// extracting a suitable range of elements.
	for (auto &Src : Sources) {
	EVT SrcVT = Src.ShuffleVec.getValueType();

	if (SrcVT.getSizeInBits() == VT.getSizeInBits())
	continue;

	// This stage of the search produces a source with the same element type as
	// the original, but with a total width matching the BUILD_VECTOR output.
	EVT EltVT = SrcVT.getVectorElementType();
	unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
	EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);

	if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
	if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
	return SDValue();
	// We can pad out the smaller vector for free, so if it's part of a
	// shuffle...
	Src.ShuffleVec =
	DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
	DAG.getUNDEF(Src.ShuffleVec.getValueType()));
	continue;
	}

	if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
	return SDValue();

	if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
	// Span too large for a VEXT to cope
	return SDValue();
	}

	if (Src.MinElt >= NumSrcElts) {
	// The extraction can just take the second half
	Src.ShuffleVec =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
	DAG.getConstant(NumSrcElts, dl, MVT::i32));
	Src.WindowBase = -NumSrcElts;
	} else if (Src.MaxElt < NumSrcElts) {
	// The extraction can just take the first half
	Src.ShuffleVec =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
	DAG.getConstant(0, dl, MVT::i32));
	} else {
	// An actual VEXT is needed
	SDValue VEXTSrc1 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
	DAG.getConstant(0, dl, MVT::i32));
	SDValue VEXTSrc2 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
	DAG.getConstant(NumSrcElts, dl, MVT::i32));

	Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
	VEXTSrc2,
	DAG.getConstant(Src.MinElt, dl, MVT::i32));
	Src.WindowBase = -Src.MinElt;
	}
	}

	// Another possible incompatibility occurs from the vector element types. We
	// can fix this by bitcasting the source vectors to the same type we intend
	// for the shuffle.
	for (auto &Src : Sources) {
	EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
	if (SrcEltTy == SmallestEltTy)
	continue;
	assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
	Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
	Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
	Src.WindowBase *= Src.WindowScale;
	}

	// Final sanity check before we try to actually produce a shuffle.
	DEBUG(
	for (auto Src : Sources)
	assert(Src.ShuffleVec.getValueType() == ShuffleVT);
	);

	// The stars all align, our next step is to produce the mask for the shuffle.
	SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
	int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
	for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
	SDValue Entry = Op.getOperand(i);
	if (Entry.isUndef())
	continue;

	auto Src = llvm::find(Sources, Entry.getOperand(0));
	int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();

	// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
	// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
	// segment.
	EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
	int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
	VT.getScalarSizeInBits());
	int LanesDefined = BitsDefined / BitsPerShuffleLane;

	// This source is expected to fill ResMultiplier lanes of the final shuffle,
	// starting at the appropriate offset.
	int LaneMask = &Mask[i ResMultiplier];

	int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
	ExtractBase += NumElts * (Src - Sources.begin());
	for (int j = 0; j < LanesDefined; ++j)
	LaneMask[j] = ExtractBase + j;
	}

	// Final check before we try to produce nonsense...
	if (!isShuffleMaskLegal(Mask, ShuffleVT))
	return SDValue();

	// We can't handle more than two sources. This should have already
	// been checked before this point.
	assert(Sources.size() <= 2 && "Too many sources!");

	SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
	for (unsigned i = 0; i < Sources.size(); ++i)
	ShuffleOps[i] = Sources[i].ShuffleVec;

	SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
	ShuffleOps[1], Mask);
	return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
	}

	/// isShuffleMaskLegal - Targets can use this to indicate that they only
	/// support some VECTOR_SHUFFLE operations, those with specific masks.
	/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
	/// are assumed to be legal.
	bool
	ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
	EVT VT) const {
	if (VT.getVectorNumElements() == 4 &&
	(VT.is128BitVector() \|\| VT.is64BitVector())) {
	unsigned PFIndexes[4];
	for (unsigned i = 0; i != 4; ++i) {
	if (M[i] < 0)
	PFIndexes[i] = 8;
	else
	PFIndexes[i] = M[i];
	}

	// Compute the index in the perfect shuffle table.
	unsigned PFTableIndex =
	PFIndexes[0]999+PFIndexes[1]99+PFIndexes[2]9+PFIndexes[3];
	unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
	unsigned Cost = (PFEntry >> 30);

	if (Cost <= 4)
	return true;
	}

	bool ReverseVEXT, isV_UNDEF;
	unsigned Imm, WhichResult;

	unsigned EltSize = VT.getScalarSizeInBits();
	return (EltSize >= 32 \|\|
	ShuffleVectorSDNode::isSplatMask(&M[0], VT) \|\|
	isVREVMask(M, VT, 64) \|\|
	isVREVMask(M, VT, 32) \|\|
	isVREVMask(M, VT, 16) \|\|
	isVEXTMask(M, VT, ReverseVEXT, Imm) \|\|
	isVTBLMask(M, VT) \|\|
	isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) \|\|
	((VT == MVT::v8i16 \|\| VT == MVT::v16i8) && isReverseMask(M, VT)));
	}

	/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
	/// the specified operations to build the shuffle.
	static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
	SDValue RHS, SelectionDAG &DAG,
	const SDLoc &dl) {
	unsigned OpNum = (PFEntry >> 26) & 0x0F;
	unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
	unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);

	enum {
	OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
	OP_VREV,
	OP_VDUP0,
	OP_VDUP1,
	OP_VDUP2,
	OP_VDUP3,
	OP_VEXT1,
	OP_VEXT2,
	OP_VEXT3,
	OP_VUZPL, // VUZP, left result
	OP_VUZPR, // VUZP, right result
	OP_VZIPL, // VZIP, left result
	OP_VZIPR, // VZIP, right result
	OP_VTRNL, // VTRN, left result
	OP_VTRNR // VTRN, right result
	};

	if (OpNum == OP_COPY) {
	if (LHSID == (19+2)9+3) return LHS;
	assert(LHSID == ((49+5)9+6)*9+7 && "Illegal OP_COPY!");
	return RHS;
	}

	SDValue OpLHS, OpRHS;
	OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
	OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
	EVT VT = OpLHS.getValueType();

	switch (OpNum) {
	default: llvm_unreachable("Unknown shuffle opcode!");
	case OP_VREV:
	// VREV divides the vector in half and swaps within the half.
	if (VT.getVectorElementType() == MVT::i32 \|\|
	VT.getVectorElementType() == MVT::f32)
	return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
	// vrev <4 x i16> -> VREV32
	if (VT.getVectorElementType() == MVT::i16)
	return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
	// vrev <4 x i8> -> VREV16
	assert(VT.getVectorElementType() == MVT::i8);
	return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
	case OP_VDUP0:
	case OP_VDUP1:
	case OP_VDUP2:
	case OP_VDUP3:
	return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
	OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
	case OP_VEXT1:
	case OP_VEXT2:
	case OP_VEXT3:
	return DAG.getNode(ARMISD::VEXT, dl, VT,
	OpLHS, OpRHS,
	DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
	case OP_VUZPL:
	case OP_VUZPR:
	return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
	OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
	case OP_VZIPL:
	case OP_VZIPR:
	return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
	OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
	case OP_VTRNL:
	case OP_VTRNR:
	return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
	OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
	}
	}

	static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
	ArrayRef<int> ShuffleMask,
	SelectionDAG &DAG) {
	// Check to see if we can use the VTBL instruction.
	SDValue V1 = Op.getOperand(0);
	SDValue V2 = Op.getOperand(1);
	SDLoc DL(Op);

	SmallVector<SDValue, 8> VTBLMask;
	for (ArrayRef<int>::iterator
	I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
	VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));

	if (V2.getNode()->isUndef())
	return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
	DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));

	return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
	DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
	}

	static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
	SelectionDAG &DAG) {
	SDLoc DL(Op);
	SDValue OpLHS = Op.getOperand(0);
	EVT VT = OpLHS.getValueType();

	assert((VT == MVT::v8i16 \|\| VT == MVT::v16i8) &&
	"Expect an v8i16/v16i8 type");
	OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
	// For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
	// extract the first 8 bytes into the top double word and the last 8 bytes
	// into the bottom double word. The v8i16 case is similar.
	unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
	return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
	DAG.getConstant(ExtractNum, DL, MVT::i32));
	}

	static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
	SDValue V1 = Op.getOperand(0);
	SDValue V2 = Op.getOperand(1);
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());

	// Convert shuffles that are directly supported on NEON to target-specific
	// DAG nodes, instead of keeping them as shuffles and matching them again
	// during code selection. This is more efficient and avoids the possibility
	// of inconsistencies between legalization and selection.
	// FIXME: floating-point vectors should be canonicalized to integer vectors
	// of the same time so that they get CSEd properly.
	ArrayRef<int> ShuffleMask = SVN->getMask();

	unsigned EltSize = VT.getScalarSizeInBits();
	if (EltSize <= 32) {
	if (SVN->isSplat()) {
	int Lane = SVN->getSplatIndex();
	// If this is undef splat, generate it via "just" vdup, if possible.
	if (Lane == -1) Lane = 0;

	// Test if V1 is a SCALAR_TO_VECTOR.
	if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
	return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
	}
	// Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
	// (and probably will turn into a SCALAR_TO_VECTOR once legalization
	// reaches it).
	if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
	!isa<ConstantSDNode>(V1.getOperand(0))) {
	bool IsScalarToVector = true;
	for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
	if (!V1.getOperand(i).isUndef()) {
	IsScalarToVector = false;
	break;
	}
	if (IsScalarToVector)
	return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
	}
	return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
	DAG.getConstant(Lane, dl, MVT::i32));
	}

	bool ReverseVEXT;
	unsigned Imm;
	if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
	if (ReverseVEXT)
	std::swap(V1, V2);
	return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
	DAG.getConstant(Imm, dl, MVT::i32));
	}

	if (isVREVMask(ShuffleMask, VT, 64))
	return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
	if (isVREVMask(ShuffleMask, VT, 32))
	return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
	if (isVREVMask(ShuffleMask, VT, 16))
	return DAG.getNode(ARMISD::VREV16, dl, VT, V1);

	if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
	return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
	DAG.getConstant(Imm, dl, MVT::i32));
	}

	// Check for Neon shuffles that modify both input vectors in place.
	// If both results are used, i.e., if there are two shuffles with the same
	// source operands and with masks corresponding to both results of one of
	// these operations, DAG memoization will ensure that a single node is
	// used for both shuffles.
	unsigned WhichResult;
	bool isV_UNDEF;
	if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
	ShuffleMask, VT, WhichResult, isV_UNDEF)) {
	if (isV_UNDEF)
	V2 = V1;
	return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
	.getValue(WhichResult);
	}

	// Also check for these shuffles through CONCAT_VECTORS: we canonicalize
	// shuffles that produce a result larger than their operands with:
	// shuffle(concat(v1, undef), concat(v2, undef))
	// ->
	// shuffle(concat(v1, v2), undef)
	// because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
	//
	// This is useful in the general case, but there are special cases where
	// native shuffles produce larger results: the two-result ops.
	//
	// Look through the concat when lowering them:
	// shuffle(concat(v1, v2), undef)
	// ->
	// concat(VZIP(v1, v2):0, :1)
	//
	if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
	SDValue SubV1 = V1->getOperand(0);
	SDValue SubV2 = V1->getOperand(1);
	EVT SubVT = SubV1.getValueType();

	// We expect these to have been canonicalized to -1.
	assert(llvm::all_of(ShuffleMask, [&](int i) {
	return i < (int)VT.getVectorNumElements();
	}) && "Unexpected shuffle index into UNDEF operand!");

	if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
	ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
	if (isV_UNDEF)
	SubV2 = SubV1;
	assert((WhichResult == 0) &&
	"In-place shuffle of concat can only have one result!");
	SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
	SubV1, SubV2);
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
	Res.getValue(1));
	}
	}
	}

	// If the shuffle is not directly supported and it has 4 elements, use
	// the PerfectShuffle-generated table to synthesize it from other shuffles.
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts == 4) {
	unsigned PFIndexes[4];
	for (unsigned i = 0; i != 4; ++i) {
	if (ShuffleMask[i] < 0)
	PFIndexes[i] = 8;
	else
	PFIndexes[i] = ShuffleMask[i];
	}

	// Compute the index in the perfect shuffle table.
	unsigned PFTableIndex =
	PFIndexes[0]999+PFIndexes[1]99+PFIndexes[2]9+PFIndexes[3];
	unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
	unsigned Cost = (PFEntry >> 30);

	if (Cost <= 4)
	return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
	}

	// Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
	if (EltSize >= 32) {
	// Do the expansion with floating-point types, since that is what the VFP
	// registers are defined to use, and since i64 is not legal.
	EVT EltVT = EVT::getFloatingPointVT(EltSize);
	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
	V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
	V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
	SmallVector<SDValue, 8> Ops;
	for (unsigned i = 0; i < NumElts; ++i) {
	if (ShuffleMask[i] < 0)
	Ops.push_back(DAG.getUNDEF(EltVT));
	else
	Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
	ShuffleMask[i] < (int)NumElts ? V1 : V2,
	DAG.getConstant(ShuffleMask[i] & (NumElts-1),
	dl, MVT::i32)));
	}
	SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
	return DAG.getNode(ISD::BITCAST, dl, VT, Val);
	}

	if ((VT == MVT::v8i16 \|\| VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
	return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);

	if (VT == MVT::v8i8)
	if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
	return NewOp;

	return SDValue();
	}

	static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
	// INSERT_VECTOR_ELT is legal only for immediate indexes.
	SDValue Lane = Op.getOperand(2);
	if (!isa<ConstantSDNode>(Lane))
	return SDValue();

	return Op;
	}

	static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
	// EXTRACT_VECTOR_ELT is legal only for immediate indexes.
	SDValue Lane = Op.getOperand(1);
	if (!isa<ConstantSDNode>(Lane))
	return SDValue();

	SDValue Vec = Op.getOperand(0);
	if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
	SDLoc dl(Op);
	return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
	}

	return Op;
	}

	static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
	// The only time a CONCAT_VECTORS operation can have legal types is when
	// two 64-bit vectors are concatenated to a 128-bit vector.
	assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
	"unexpected CONCAT_VECTORS");
	SDLoc dl(Op);
	SDValue Val = DAG.getUNDEF(MVT::v2f64);
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	if (!Op0.isUndef())
	Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
	DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
	DAG.getIntPtrConstant(0, dl));
	if (!Op1.isUndef())
	Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
	DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
	DAG.getIntPtrConstant(1, dl));
	return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
	}

	/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
	/// element has been zero/sign-extended, depending on the isSigned parameter,
	/// from an integer type half its size.
	static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
	bool isSigned) {
	// A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
	EVT VT = N->getValueType(0);
	if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
	SDNode *BVN = N->getOperand(0).getNode();
	if (BVN->getValueType(0) != MVT::v4i32 \|\|
	BVN->getOpcode() != ISD::BUILD_VECTOR)
	return false;
	unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
	unsigned HiElt = 1 - LoElt;
	ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
	ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
	ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
	ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
	if (!Lo0 \|\| !Hi0 \|\| !Lo1 \|\| !Hi1)
	return false;
	if (isSigned) {
	if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
	Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
	return true;
	} else {
	if (Hi0->isNullValue() && Hi1->isNullValue())
	return true;
	}
	return false;
	}

	if (N->getOpcode() != ISD::BUILD_VECTOR)
	return false;

	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
	SDNode *Elt = N->getOperand(i).getNode();
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
	unsigned EltSize = VT.getScalarSizeInBits();
	unsigned HalfSize = EltSize / 2;
	if (isSigned) {
	if (!isIntN(HalfSize, C->getSExtValue()))
	return false;
	} else {
	if (!isUIntN(HalfSize, C->getZExtValue()))
	return false;
	}
	continue;
	}
	return false;
	}

	return true;
	}

	/// isSignExtended - Check if a node is a vector value that is sign-extended
	/// or a constant BUILD_VECTOR with sign-extended elements.
	static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
	if (N->getOpcode() == ISD::SIGN_EXTEND \|\| ISD::isSEXTLoad(N))
	return true;
	if (isExtendedBUILD_VECTOR(N, DAG, true))
	return true;
	return false;
	}

	/// isZeroExtended - Check if a node is a vector value that is zero-extended
	/// or a constant BUILD_VECTOR with zero-extended elements.
	static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
	if (N->getOpcode() == ISD::ZERO_EXTEND \|\| ISD::isZEXTLoad(N))
	return true;
	if (isExtendedBUILD_VECTOR(N, DAG, false))
	return true;
	return false;
	}

	static EVT getExtensionTo64Bits(const EVT &OrigVT) {
	if (OrigVT.getSizeInBits() >= 64)
	return OrigVT;

	assert(OrigVT.isSimple() && "Expecting a simple value type");

	MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
	switch (OrigSimpleTy) {
	default: llvm_unreachable("Unexpected Vector Type");
	case MVT::v2i8:
	case MVT::v2i16:
	return MVT::v2i32;
	case MVT::v4i8:
	return MVT::v4i16;
	}
	}

	/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
	/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
	/// We insert the required extension here to get the vector to fill a D register.
	static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
	const EVT &OrigTy,
	const EVT &ExtTy,
	unsigned ExtOpcode) {
	// The vector originally had a size of OrigTy. It was then extended to ExtTy.
	// We expect the ExtTy to be 128-bits total. If the OrigTy is less than
	// 64-bits we need to insert a new extension so that it will be 64-bits.
	assert(ExtTy.is128BitVector() && "Unexpected extension size");
	if (OrigTy.getSizeInBits() >= 64)
	return N;

	// Must extend size to at least 64 bits to be used as an operand for VMULL.
	EVT NewVT = getExtensionTo64Bits(OrigTy);

	return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
	}

	/// SkipLoadExtensionForVMULL - return a load of the original vector size that
	/// does not do any sign/zero extension. If the original vector is less
	/// than 64 bits, an appropriate extension will be added after the load to
	/// reach a total size of 64 bits. We have to add the extension separately
	/// because ARM does not have a sign/zero extending load for vectors.
	static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
	EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());

	// The load already has the right type.
	if (ExtendedTy == LD->getMemoryVT())
	return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
	LD->getBasePtr(), LD->getPointerInfo(),
	LD->getAlignment(), LD->getMemOperand()->getFlags());

	// We need to create a zextload/sextload. We cannot just create a load
	// followed by a zext/zext node because LowerMUL is also run during normal
	// operation legalization where we can't create illegal types.
	return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
	LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
	LD->getMemoryVT(), LD->getAlignment(),
	LD->getMemOperand()->getFlags());
	}

	/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
	/// extending load, or BUILD_VECTOR with extended elements, return the
	/// unextended value. The unextended vector should be 64 bits so that it can
	/// be used as an operand to a VMULL instruction. If the original vector size
	/// before extension is less than 64 bits we add a an extension to resize
	/// the vector to 64 bits.
	static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
	if (N->getOpcode() == ISD::SIGN_EXTEND \|\| N->getOpcode() == ISD::ZERO_EXTEND)
	return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
	N->getOperand(0)->getValueType(0),
	N->getValueType(0),
	N->getOpcode());

	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
	assert((ISD::isSEXTLoad(LD) \|\| ISD::isZEXTLoad(LD)) &&
	"Expected extending load");

	SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
	unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	SDValue extLoad =
	DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);

	return newLoad;
	}

	// Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
	// have been legalized as a BITCAST from v4i32.
	if (N->getOpcode() == ISD::BITCAST) {
	SDNode *BVN = N->getOperand(0).getNode();
	assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
	BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
	unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
	return DAG.getBuildVector(
	MVT::v2i32, SDLoc(N),
	{BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
	}
	// Construct a new BUILD_VECTOR with elements truncated to half the size.
	assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
	EVT VT = N->getValueType(0);
	unsigned EltSize = VT.getScalarSizeInBits() / 2;
	unsigned NumElts = VT.getVectorNumElements();
	MVT TruncVT = MVT::getIntegerVT(EltSize);
	SmallVector<SDValue, 8> Ops;
	SDLoc dl(N);
	for (unsigned i = 0; i != NumElts; ++i) {
	ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
	const APInt &CInt = C->getAPIntValue();
	// Element types smaller than 32 bits are not legal, so use i32 elements.
	// The values are implicitly truncated so sext vs. zext doesn't matter.
	Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
	}
	return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
	}

	static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
	unsigned Opcode = N->getOpcode();
	if (Opcode == ISD::ADD \|\| Opcode == ISD::SUB) {
	SDNode *N0 = N->getOperand(0).getNode();
	SDNode *N1 = N->getOperand(1).getNode();
	return N0->hasOneUse() && N1->hasOneUse() &&
	isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
	}
	return false;
	}

	static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
	unsigned Opcode = N->getOpcode();
	if (Opcode == ISD::ADD \|\| Opcode == ISD::SUB) {
	SDNode *N0 = N->getOperand(0).getNode();
	SDNode *N1 = N->getOperand(1).getNode();
	return N0->hasOneUse() && N1->hasOneUse() &&
	isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
	}
	return false;
	}

	static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
	// Multiplications are only custom-lowered for 128-bit vectors so that
	// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
	EVT VT = Op.getValueType();
	assert(VT.is128BitVector() && VT.isInteger() &&
	"unexpected type for custom-lowering ISD::MUL");
	SDNode *N0 = Op.getOperand(0).getNode();
	SDNode *N1 = Op.getOperand(1).getNode();
	unsigned NewOpc = 0;
	bool isMLA = false;
	bool isN0SExt = isSignExtended(N0, DAG);
	bool isN1SExt = isSignExtended(N1, DAG);
	if (isN0SExt && isN1SExt)
	NewOpc = ARMISD::VMULLs;
	else {
	bool isN0ZExt = isZeroExtended(N0, DAG);
	bool isN1ZExt = isZeroExtended(N1, DAG);
	if (isN0ZExt && isN1ZExt)
	NewOpc = ARMISD::VMULLu;
	else if (isN1SExt \|\| isN1ZExt) {
	// Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
	// into (s/zext A * s/zext C) + (s/zext B * s/zext C)
	if (isN1SExt && isAddSubSExt(N0, DAG)) {
	NewOpc = ARMISD::VMULLs;
	isMLA = true;
	} else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
	NewOpc = ARMISD::VMULLu;
	isMLA = true;
	} else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
	std::swap(N0, N1);
	NewOpc = ARMISD::VMULLu;
	isMLA = true;
	}
	}

	if (!NewOpc) {
	if (VT == MVT::v2i64)
	// Fall through to expand this. It is not legal.
	return SDValue();
	else
	// Other vector multiplications are legal.
	return Op;
	}
	}

	// Legalize to a VMULL instruction.
	SDLoc DL(Op);
	SDValue Op0;
	SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
	if (!isMLA) {
	Op0 = SkipExtensionForVMULL(N0, DAG);
	assert(Op0.getValueType().is64BitVector() &&
	Op1.getValueType().is64BitVector() &&
	"unexpected types for extended operands to VMULL");
	return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
	}

	// Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
	// isel lowering to take advantage of no-stall back to back vmul + vmla.
	// vmull q0, d4, d6
	// vmlal q0, d5, d6
	// is faster than
	// vaddl q0, d4, d5
	// vmovl q1, d6
	// vmul q0, q0, q1
	SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
	SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
	EVT Op1VT = Op1.getValueType();
	return DAG.getNode(N0->getOpcode(), DL, VT,
	DAG.getNode(NewOpc, DL, VT,
	DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
	DAG.getNode(NewOpc, DL, VT,
	DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
	}

	static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
	SelectionDAG &DAG) {
	// TODO: Should this propagate fast-math-flags?

	// Convert to float
	// float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
	// float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
	X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
	Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
	X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
	Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
	// Get reciprocal estimate.
	// float4 recip = vrecpeq_f32(yf);
	Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
	DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
	Y);
	// Because char has a smaller range than uchar, we can actually get away
	// without any newton steps. This requires that we use a weird bias
	// of 0xb000, however (again, this has been exhaustively tested).
	// float4 result = as_float4(as_int4(xf*recip) + 0xb000);
	X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
	X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
	Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
	X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
	X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
	// Convert back to short.
	X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
	X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
	return X;
	}

	static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
	SelectionDAG &DAG) {
	// TODO: Should this propagate fast-math-flags?

	SDValue N2;
	// Convert to float.
	// float4 yf = vcvt_f32_s32(vmovl_s16(y));
	// float4 xf = vcvt_f32_s32(vmovl_s16(x));
	N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
	N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
	N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
	N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);

	// Use reciprocal estimate and one refinement step.
	// float4 recip = vrecpeq_f32(yf);
	// recip *= vrecpsq_f32(yf, recip);
	N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
	DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
	N1);
	N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
	DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
	N1, N2);
	N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
	// Because short has a smaller range than ushort, we can actually get away
	// with only a single newton step. This requires that we use a weird bias
	// of 89, however (again, this has been exhaustively tested).
	// float4 result = as_float4(as_int4(xf*recip) + 0x89);
	N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
	N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
	N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
	N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
	N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
	// Convert back to integer and return.
	// return vmovn_s32(vcvt_s32_f32(result));
	N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
	N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
	return N0;
	}

	static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
	EVT VT = Op.getValueType();
	assert((VT == MVT::v4i16 \|\| VT == MVT::v8i8) &&
	"unexpected type for custom-lowering ISD::SDIV");

	SDLoc dl(Op);
	SDValue N0 = Op.getOperand(0);
	SDValue N1 = Op.getOperand(1);
	SDValue N2, N3;

	if (VT == MVT::v8i8) {
	N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
	N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);

	N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
	DAG.getIntPtrConstant(4, dl));
	N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
	DAG.getIntPtrConstant(4, dl));
	N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
	DAG.getIntPtrConstant(0, dl));
	N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
	DAG.getIntPtrConstant(0, dl));

	N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
	N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16

	N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
	N0 = LowerCONCAT_VECTORS(N0, DAG);

	N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
	return N0;
	}
	return LowerSDIV_v4i16(N0, N1, dl, DAG);
	}

	static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
	// TODO: Should this propagate fast-math-flags?
	EVT VT = Op.getValueType();
	assert((VT == MVT::v4i16 \|\| VT == MVT::v8i8) &&
	"unexpected type for custom-lowering ISD::UDIV");

	SDLoc dl(Op);
	SDValue N0 = Op.getOperand(0);
	SDValue N1 = Op.getOperand(1);
	SDValue N2, N3;

	if (VT == MVT::v8i8) {
	N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
	N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);

	N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
	DAG.getIntPtrConstant(4, dl));
	N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
	DAG.getIntPtrConstant(4, dl));
	N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
	DAG.getIntPtrConstant(0, dl));
	N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
	DAG.getIntPtrConstant(0, dl));

	N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
	N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16

	N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
	N0 = LowerCONCAT_VECTORS(N0, DAG);

	N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
	DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
	MVT::i32),
	N0);
	return N0;
	}

	// v4i16 sdiv ... Convert to float.
	// float4 yf = vcvt_f32_s32(vmovl_u16(y));
	// float4 xf = vcvt_f32_s32(vmovl_u16(x));
	N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
	N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
	N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
	SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);

	// Use reciprocal estimate and two refinement steps.
	// float4 recip = vrecpeq_f32(yf);
	// recip *= vrecpsq_f32(yf, recip);
	// recip *= vrecpsq_f32(yf, recip);
	N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
	DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
	BN1);
	N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
	DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
	BN1, N2);
	N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
	N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
	DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
	BN1, N2);
	N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
	// Simply multiplying by the reciprocal estimate can leave us a few ulps
	// too low, so we add 2 ulps (exhaustive testing shows that this is enough,
	// and that it will never cause us to return an answer too large).
	// float4 result = as_float4(as_int4(xf*recip) + 2);
	N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
	N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
	N1 = DAG.getConstant(2, dl, MVT::v4i32);
	N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
	N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
	// Convert back to integer and return.
	// return vmovn_u32(vcvt_s32_f32(result));
	N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
	N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
	return N0;
	}

	static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
	EVT VT = Op.getNode()->getValueType(0);
	SDVTList VTs = DAG.getVTList(VT, MVT::i32);

	unsigned Opc;
	bool ExtraOp = false;
	switch (Op.getOpcode()) {
	default: llvm_unreachable("Invalid code");
	case ISD::ADDC: Opc = ARMISD::ADDC; break;
	case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
	case ISD::SUBC: Opc = ARMISD::SUBC; break;
	case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
	}

	if (!ExtraOp)
	return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
	Op.getOperand(1));
	return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
	Op.getOperand(1), Op.getOperand(2));
	}

	SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
	assert(Subtarget->isTargetDarwin());

	// For iOS, we want to call an alternative entry point: __sincos_stret,
	// return values are passed via sret.
	SDLoc dl(Op);
	SDValue Arg = Op.getOperand(0);
	EVT ArgVT = Arg.getValueType();
	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// Pair of floats / doubles used to pass the result.
	Type *RetTy = StructType::get(ArgTy, ArgTy);
	auto &DL = DAG.getDataLayout();

	ArgListTy Args;
	bool ShouldUseSRet = Subtarget->isAPCS_ABI();
	SDValue SRet;
	if (ShouldUseSRet) {
	// Create stack object for sret.
	const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
	const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
	int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
	SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));

	ArgListEntry Entry;
	Entry.Node = SRet;
	Entry.Ty = RetTy->getPointerTo();
	Entry.IsSExt = false;
	Entry.IsZExt = false;
	Entry.IsSRet = true;
	Args.push_back(Entry);
	RetTy = Type::getVoidTy(*DAG.getContext());
	}

	ArgListEntry Entry;
	Entry.Node = Arg;
	Entry.Ty = ArgTy;
	Entry.IsSExt = false;
	Entry.IsZExt = false;
	Args.push_back(Entry);

	const char *LibcallName =
	(ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
	RTLIB::Libcall LC =
	(ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
	CallingConv::ID CC = getLibcallCallingConv(LC);
	SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl)
	.setChain(DAG.getEntryNode())
	.setCallee(CC, RetTy, Callee, std::move(Args))
	.setDiscardResult(ShouldUseSRet);
	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);

	if (!ShouldUseSRet)
	return CallResult.first;

	SDValue LoadSin =
	DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());

	// Address of cos field.
	SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
	DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
	SDValue LoadCos =
	DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());

	SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
	return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
	LoadSin.getValue(0), LoadCos.getValue(0));
	}

	SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
	bool Signed,
	SDValue &Chain) const {
	EVT VT = Op.getValueType();
	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
	"unexpected type for custom lowering DIV");
	SDLoc dl(Op);

	const auto &DL = DAG.getDataLayout();
	const auto &TLI = DAG.getTargetLoweringInfo();

	const char *Name = nullptr;
	if (Signed)
	Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
	else
	Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";

	SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));

	ARMTargetLowering::ArgListTy Args;

	for (auto AI : {1, 0}) {
	ArgListEntry Arg;
	Arg.Node = Op.getOperand(AI);
	Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
	Args.push_back(Arg);
	}

	CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl)
	.setChain(Chain)
	.setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
	ES, std::move(Args));

	return LowerCallTo(CLI).first;
	}

	SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
	bool Signed) const {
	assert(Op.getValueType() == MVT::i32 &&
	"unexpected type for custom lowering DIV");
	SDLoc dl(Op);

	SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
	DAG.getEntryNode(), Op.getOperand(1));

	return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
	}

	static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
	SDLoc DL(N);
	SDValue Op = N->getOperand(1);
	if (N->getValueType(0) == MVT::i32)
	return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
	SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
	DAG.getConstant(0, DL, MVT::i32));
	SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
	DAG.getConstant(1, DL, MVT::i32));
	return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
	DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
	}

	void ARMTargetLowering::ExpandDIV_Windows(
	SDValue Op, SelectionDAG &DAG, bool Signed,
	SmallVectorImpl<SDValue> &Results) const {
	const auto &DL = DAG.getDataLayout();
	const auto &TLI = DAG.getTargetLoweringInfo();

	assert(Op.getValueType() == MVT::i64 &&
	"unexpected type for custom lowering DIV");
	SDLoc dl(Op);

	SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());

	SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);

	SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
	SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
	DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
	Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);

	Results.push_back(Lower);
	Results.push_back(Upper);
	}

	static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
	if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
	// Acquire/Release load/store is not legal for targets without a dmb or
	// equivalent available.
	return SDValue();

	// Monotonic load/store is legal for all targets.
	return Op;
	}

	static void ReplaceREADCYCLECOUNTER(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG,
	const ARMSubtarget *Subtarget) {
	SDLoc DL(N);
	// Under Power Management extensions, the cycle-count is:
	// mrc p15, #0, <Rt>, c9, c13, #0
	SDValue Ops[] = { N->getOperand(0), // Chain
	DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
	DAG.getConstant(15, DL, MVT::i32),
	DAG.getConstant(0, DL, MVT::i32),
	DAG.getConstant(9, DL, MVT::i32),
	DAG.getConstant(13, DL, MVT::i32),
	DAG.getConstant(0, DL, MVT::i32)
	};

	SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
	DAG.getVTList(MVT::i32, MVT::Other), Ops);
	Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
	DAG.getConstant(0, DL, MVT::i32)));
	Results.push_back(Cycles32.getValue(1));
	}

	static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
	SDLoc dl(V.getNode());
	SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
	SDValue VHi = DAG.getAnyExtOrTrunc(
	DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
	dl, MVT::i32);
	bool isBigEndian = DAG.getDataLayout().isBigEndian();
	if (isBigEndian)
	std::swap (VLo, VHi);
	SDValue RegClass =
	DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
	SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
	SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
	const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
	return SDValue(
	DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
	}

	static void ReplaceCMP_SWAP_64Results(SDNode *N,
	SmallVectorImpl<SDValue> & Results,
	SelectionDAG &DAG) {
	assert(N->getValueType(0) == MVT::i64 &&
	"AtomicCmpSwap on types less than 64 should be legal");
	SDValue Ops[] = {N->getOperand(1),
	createGPRPairNode(DAG, N->getOperand(2)),
	createGPRPairNode(DAG, N->getOperand(3)),
	N->getOperand(0)};
	SDNode *CmpSwap = DAG.getMachineNode(
	ARM::CMP_SWAP_64, SDLoc(N),
	DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);

	MachineFunction &MF = DAG.getMachineFunction();
	MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
	MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
	cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);

	bool isBigEndian = DAG.getDataLayout().isBigEndian();

	Results.push_back(
	DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
	SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
	Results.push_back(
	DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
	SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
	Results.push_back(SDValue(CmpSwap, 2));
	}

	static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
	SelectionDAG &DAG) {
	const auto &TLI = DAG.getTargetLoweringInfo();

	assert(Subtarget.getTargetTriple().isOSMSVCRT() &&
	"Custom lowering is MSVCRT specific!");

	SDLoc dl(Op);
	SDValue Val = Op.getOperand(0);
	MVT Ty = Val->getSimpleValueType(0);
	SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
	SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
	TLI.getPointerTy(DAG.getDataLayout()));

	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;

	Entry.Node = Val;
	Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
	Entry.IsZExt = true;
	Args.push_back(Entry);

	Entry.Node = Exponent;
	Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
	Entry.IsZExt = true;
	Args.push_back(Entry);

	Type LCRTy = Val.getValueType().getTypeForEVT(DAG.getContext());

	// In the in-chain to the call is the entry node If we are emitting a
	// tailcall, the chain will be mutated if the node has a non-entry input
	// chain.
	SDValue InChain = DAG.getEntryNode();
	SDValue TCChain = InChain;

	const auto *F = DAG.getMachineFunction().getFunction();
	bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
	F->getReturnType() == LCRTy;
	if (IsTC)
	InChain = TCChain;

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl)
	.setChain(InChain)
	.setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
	.setTailCall(IsTC);
	std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);

	// Return the chain (the DAG root) if it is a tail call
	return !CI.second.getNode() ? DAG.getRoot() : CI.first;
	}

	SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
	switch (Op.getOpcode()) {
	default: llvm_unreachable("Don't know how to custom lower this!");
	case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
	case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
	case ISD::SELECT: return LowerSELECT(Op, DAG);
	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
	case ISD::BR_CC: return LowerBR_CC(Op, DAG);
	case ISD::BR_JT: return LowerBR_JT(Op, DAG);
	case ISD::VASTART: return LowerVASTART(Op, DAG);
	case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
	case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
	case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
	case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
	case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
	case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
	Subtarget);
	case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
	case ISD::SHL:
	case ISD::SRL:
	case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
	case ISD::SREM: return LowerREM(Op.getNode(), DAG);
	case ISD::UREM: return LowerREM(Op.getNode(), DAG);
	case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
	case ISD::SRL_PARTS:
	case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
	case ISD::CTTZ:
	case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
	case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
	case ISD::SETCC: return LowerVSETCC(Op, DAG);
	case ISD::SETCCE: return LowerSETCCE(Op, DAG);
	case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
	case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
	case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
	case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
	case ISD::MUL: return LowerMUL(Op, DAG);
	case ISD::SDIV:
	if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
	return LowerDIV_Windows(Op, DAG, /* Signed */ true);
	return LowerSDIV(Op, DAG);
	case ISD::UDIV:
	if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
	return LowerDIV_Windows(Op, DAG, /* Signed */ false);
	return LowerUDIV(Op, DAG);
	case ISD::ADDC:
	case ISD::ADDE:
	case ISD::SUBC:
	case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
	case ISD::SADDO:
	case ISD::UADDO:
	case ISD::SSUBO:
	case ISD::USUBO:
	return LowerXALUO(Op, DAG);
	case ISD::ATOMIC_LOAD:
	case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
	case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
	case ISD::SDIVREM:
	case ISD::UDIVREM: return LowerDivRem(Op, DAG);
	case ISD::DYNAMIC_STACKALLOC:
	if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
	return LowerDYNAMIC_STACKALLOC(Op, DAG);
	llvm_unreachable("Don't know how to custom lower this!");
	case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
	case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
	case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
	case ARMISD::WIN__DBZCHK: return SDValue();
	}
	}

	static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) {
	unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
	unsigned Opc = 0;
	if (IntNo == Intrinsic::arm_smlald)
	Opc = ARMISD::SMLALD;
	else if (IntNo == Intrinsic::arm_smlaldx)
	Opc = ARMISD::SMLALDX;
	else if (IntNo == Intrinsic::arm_smlsld)
	Opc = ARMISD::SMLSLD;
	else if (IntNo == Intrinsic::arm_smlsldx)
	Opc = ARMISD::SMLSLDX;
	else
	return;

	SDLoc dl(N);
	SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
	N->getOperand(3),
	DAG.getConstant(0, dl, MVT::i32));
	SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
	N->getOperand(3),
	DAG.getConstant(1, dl, MVT::i32));

	SDValue LongMul = DAG.getNode(Opc, dl,
	DAG.getVTList(MVT::i32, MVT::i32),
	N->getOperand(1), N->getOperand(2),
	Lo, Hi);
	Results.push_back(LongMul.getValue(0));
	Results.push_back(LongMul.getValue(1));
	}

	/// ReplaceNodeResults - Replace the results of node with an illegal result
	/// type with new values built out of custom code.
	void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) const {
	SDValue Res;
	switch (N->getOpcode()) {
	default:
	llvm_unreachable("Don't know how to custom expand this!");
	case ISD::READ_REGISTER:
	ExpandREAD_REGISTER(N, Results, DAG);
	break;
	case ISD::BITCAST:
	Res = ExpandBITCAST(N, DAG);
	break;
	case ISD::SRL:
	case ISD::SRA:
	Res = Expand64BitShift(N, DAG, Subtarget);
	break;
	case ISD::SREM:
	case ISD::UREM:
	Res = LowerREM(N, DAG);
	break;
	case ISD::SDIVREM:
	case ISD::UDIVREM:
	Res = LowerDivRem(SDValue(N, 0), DAG);
	assert(Res.getNumOperands() == 2 && "DivRem needs two values");
	Results.push_back(Res.getValue(0));
	Results.push_back(Res.getValue(1));
	return;
	case ISD::READCYCLECOUNTER:
	ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
	return;
	case ISD::UDIV:
	case ISD::SDIV:
	assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
	return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
	Results);
	case ISD::ATOMIC_CMP_SWAP:
	ReplaceCMP_SWAP_64Results(N, Results, DAG);
	return;
	case ISD::INTRINSIC_WO_CHAIN:
	return ReplaceLongIntrinsic(N, Results, DAG);
	}
	if (Res.getNode())
	Results.push_back(Res);
	}

	//===----------------------------------------------------------------------===//
	// ARM Scheduler Hooks
	//===----------------------------------------------------------------------===//

	/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
	/// registers the function context.
	void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
	MachineBasicBlock *MBB,
	MachineBasicBlock *DispatchBB,
	int FI) const {
	assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
	"ROPI/RWPI not currently supported with SjLj");
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	DebugLoc dl = MI.getDebugLoc();
	MachineFunction *MF = MBB->getParent();
	MachineRegisterInfo *MRI = &MF->getRegInfo();
	MachineConstantPool *MCP = MF->getConstantPool();
	ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
	const Function *F = MF->getFunction();

	bool isThumb = Subtarget->isThumb();
	bool isThumb2 = Subtarget->isThumb2();

	unsigned PCLabelId = AFI->createPICLabelUId();
	unsigned PCAdj = (isThumb \|\| isThumb2) ? 4 : 8;
	ARMConstantPoolValue *CPV =
	ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
	unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);

	const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
	: &ARM::GPRRegClass;

	// Grab constant pool and fixed stack memory operands.
	MachineMemOperand *CPMMO =
	MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
	MachineMemOperand::MOLoad, 4, 4);

	MachineMemOperand *FIMMOSt =
	MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
	MachineMemOperand::MOStore, 4, 4);

	// Load the address of the dispatch MBB into the jump buffer.
	if (isThumb2) {
	// Incoming value: jbuf
	// ldr.n r5, LCPI1_1
	// orr r5, r5, #1
	// add r5, pc
	// str r5, [$jbuf, #+4] ; &jbuf[1]
	unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
	BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
	.addConstantPoolIndex(CPI)
	.addMemOperand(CPMMO)
	.add(predOps(ARMCC::AL));
	// Set the low bit because of thumb mode.
	unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
	BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
	.addReg(NewVReg1, RegState::Kill)
	.addImm(0x01)
	.add(predOps(ARMCC::AL))
	.add(condCodeOp());
	unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
	BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
	.addReg(NewVReg2, RegState::Kill)
	.addImm(PCLabelId);
	BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
	.addReg(NewVReg3, RegState::Kill)
	.addFrameIndex(FI)
	.addImm(36) // &jbuf[1] :: pc
	.addMemOperand(FIMMOSt)
	.add(predOps(ARMCC::AL));
	} else if (isThumb) {
	// Incoming value: jbuf
	// ldr.n r1, LCPI1_4
	// add r1, pc
	// mov r2, #1
	// orrs r1, r2
	// add r2, $jbuf, #+4 ; &jbuf[1]
	// str r1, [r2]
	unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
	BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
	.addConstantPoolIndex(CPI)
	.addMemOperand(CPMMO)
	.add(predOps(ARMCC::AL));
	unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
	BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
	.addReg(NewVReg1, RegState::Kill)
	.addImm(PCLabelId);
	// Set the low bit because of thumb mode.
	unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
	BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
	.addReg(ARM::CPSR, RegState::Define)
	.addImm(1)
	.add(predOps(ARMCC::AL));
	unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
	BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
	.addReg(ARM::CPSR, RegState::Define)
	.addReg(NewVReg2, RegState::Kill)
	.addReg(NewVReg3, RegState::Kill)
	.add(predOps(ARMCC::AL));
	unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
	BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
	.addFrameIndex(FI)
	.addImm(36); // &jbuf[1] :: pc
	BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
	.addReg(NewVReg4, RegState::Kill)
	.addReg(NewVReg5, RegState::Kill)
	.addImm(0)
	.addMemOperand(FIMMOSt)
	.add(predOps(ARMCC::AL));
	} else {
	// Incoming value: jbuf
	// ldr r1, LCPI1_1
	// add r1, pc, r1
	// str r1, [$jbuf, #+4] ; &jbuf[1]
	unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
	BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
	.addConstantPoolIndex(CPI)
	.addImm(0)
	.addMemOperand(CPMMO)
	.add(predOps(ARMCC::AL));
	unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
	BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
	.addReg(NewVReg1, RegState::Kill)
	.addImm(PCLabelId)
	.add(predOps(ARMCC::AL));
	BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
	.addReg(NewVReg2, RegState::Kill)
	.addFrameIndex(FI)
	.addImm(36) // &jbuf[1] :: pc
	.addMemOperand(FIMMOSt)
	.add(predOps(ARMCC::AL));
	}
	}

	void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	DebugLoc dl = MI.getDebugLoc();
	MachineFunction *MF = MBB->getParent();
	MachineRegisterInfo *MRI = &MF->getRegInfo();
	MachineFrameInfo &MFI = MF->getFrameInfo();
	int FI = MFI.getFunctionContextIndex();

	const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
	: &ARM::GPRnopcRegClass;

	// Get a mapping of the call site numbers to all of the landing pads they're
	// associated with.
	DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
	unsigned MaxCSNum = 0;
	for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
	++BB) {
	if (!BB->isEHPad()) continue;

	// FIXME: We should assert that the EH_LABEL is the first MI in the landing
	// pad.
	for (MachineBasicBlock::iterator
	II = BB->begin(), IE = BB->end(); II != IE; ++II) {
	if (!II->isEHLabel()) continue;

	MCSymbol *Sym = II->getOperand(0).getMCSymbol();
	if (!MF->hasCallSiteLandingPad(Sym)) continue;

	SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
	for (SmallVectorImpl<unsigned>::iterator
	CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
	CSI != CSE; ++CSI) {
	CallSiteNumToLPad[CSI].push_back(&BB);
	MaxCSNum = std::max(MaxCSNum, *CSI);
	}
	break;
	}
	}

	// Get an ordered list of the machine basic blocks for the jump table.
	std::vector<MachineBasicBlock*> LPadList;
	SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;
	LPadList.reserve(CallSiteNumToLPad.size());
	for (unsigned I = 1; I <= MaxCSNum; ++I) {
	SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
	for (SmallVectorImpl<MachineBasicBlock*>::iterator
	II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
	LPadList.push_back(*II);
	InvokeBBs.insert((II)->pred_begin(), (II)->pred_end());
	}
	}

	assert(!LPadList.empty() &&
	"No landing pad destinations for the dispatch jump table!");

	// Create the jump table and associated information.
	MachineJumpTableInfo *JTI =
	MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
	unsigned MJTI = JTI->createJumpTableIndex(LPadList);

	// Create the MBBs for the dispatch code.

	// Shove the dispatch's address into the return slot in the function context.
	MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
	DispatchBB->setIsEHPad();

	MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
	unsigned trap_opcode;
	if (Subtarget->isThumb())
	trap_opcode = ARM::tTRAP;
	else
	trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;

	BuildMI(TrapBB, dl, TII->get(trap_opcode));
	DispatchBB->addSuccessor(TrapBB);

	MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
	DispatchBB->addSuccessor(DispContBB);

	// Insert and MBBs.
	MF->insert(MF->end(), DispatchBB);
	MF->insert(MF->end(), DispContBB);
	MF->insert(MF->end(), TrapBB);

	// Insert code into the entry block that creates and registers the function
	// context.
	SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);

	MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
	MachinePointerInfo::getFixedStack(*MF, FI),
	MachineMemOperand::MOLoad \| MachineMemOperand::MOVolatile, 4, 4);

	MachineInstrBuilder MIB;
	MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));

	const ARMBaseInstrInfo AII = static_cast<const ARMBaseInstrInfo>(TII);
	const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();

	// Add a register mask with no preserved registers. This results in all
	// registers being marked as clobbered. This can't work if the dispatch block
	// is in a Thumb1 function and is linked with ARM code which uses the FP
	// registers, as there is no way to preserve the FP registers in Thumb1 mode.
	MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF));

	bool IsPositionIndependent = isPositionIndependent();
	unsigned NumLPads = LPadList.size();
	if (Subtarget->isThumb2()) {
	unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
	BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
	.addFrameIndex(FI)
	.addImm(4)
	.addMemOperand(FIMMOLd)
	.add(predOps(ARMCC::AL));

	if (NumLPads < 256) {
	BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
	.addReg(NewVReg1)
	.addImm(LPadList.size())
	.add(predOps(ARMCC::AL));
	} else {
	unsigned VReg1 = MRI->createVirtualRegister(TRC);
	BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
	.addImm(NumLPads & 0xFFFF)
	.add(predOps(ARMCC::AL));

	unsigned VReg2 = VReg1;
	if ((NumLPads & 0xFFFF0000) != 0) {
	VReg2 = MRI->createVirtualRegister(TRC);
	BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
	.addReg(VReg1)
	.addImm(NumLPads >> 16)
	.add(predOps(ARMCC::AL));
	}

	BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
	.addReg(NewVReg1)
	.addReg(VReg2)
	.add(predOps(ARMCC::AL));
	}

	BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
	.addMBB(TrapBB)
	.addImm(ARMCC::HI)
	.addReg(ARM::CPSR);

	unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
	BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
	.addJumpTableIndex(MJTI)
	.add(predOps(ARMCC::AL));

	unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
	BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
	.addReg(NewVReg3, RegState::Kill)
	.addReg(NewVReg1)
	.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
	.add(predOps(ARMCC::AL))
	.add(condCodeOp());

	BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
	.addReg(NewVReg4, RegState::Kill)
	.addReg(NewVReg1)
	.addJumpTableIndex(MJTI);
	} else if (Subtarget->isThumb()) {
	unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
	BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
	.addFrameIndex(FI)
	.addImm(1)
	.addMemOperand(FIMMOLd)
	.add(predOps(ARMCC::AL));

	if (NumLPads < 256) {
	BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
	.addReg(NewVReg1)
	.addImm(NumLPads)
	.add(predOps(ARMCC::AL));
	} else {
	MachineConstantPool *ConstantPool = MF->getConstantPool();
	Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
	const Constant *C = ConstantInt::get(Int32Ty, NumLPads);

	// MachineConstantPool wants an explicit alignment.
	unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
	if (Align == 0)
	Align = MF->getDataLayout().getTypeAllocSize(C->getType());
	unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);

	unsigned VReg1 = MRI->createVirtualRegister(TRC);
	BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
	.addReg(VReg1, RegState::Define)
	.addConstantPoolIndex(Idx)
	.add(predOps(ARMCC::AL));
	BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
	.addReg(NewVReg1)
	.addReg(VReg1)
	.add(predOps(ARMCC::AL));
	}

	BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
	.addMBB(TrapBB)
	.addImm(ARMCC::HI)
	.addReg(ARM::CPSR);

	unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
	BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
	.addReg(ARM::CPSR, RegState::Define)
	.addReg(NewVReg1)
	.addImm(2)
	.add(predOps(ARMCC::AL));

	unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
	BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
	.addJumpTableIndex(MJTI)
	.add(predOps(ARMCC::AL));

	unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
	BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
	.addReg(ARM::CPSR, RegState::Define)
	.addReg(NewVReg2, RegState::Kill)
	.addReg(NewVReg3)
	.add(predOps(ARMCC::AL));

	MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
	MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);

	unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
	BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
	.addReg(NewVReg4, RegState::Kill)
	.addImm(0)
	.addMemOperand(JTMMOLd)
	.add(predOps(ARMCC::AL));

	unsigned NewVReg6 = NewVReg5;
	if (IsPositionIndependent) {
	NewVReg6 = MRI->createVirtualRegister(TRC);
	BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
	.addReg(ARM::CPSR, RegState::Define)
	.addReg(NewVReg5, RegState::Kill)
	.addReg(NewVReg3)
	.add(predOps(ARMCC::AL));
	}

	BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
	.addReg(NewVReg6, RegState::Kill)
	.addJumpTableIndex(MJTI);
	} else {
	unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
	BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
	.addFrameIndex(FI)
	.addImm(4)
	.addMemOperand(FIMMOLd)
	.add(predOps(ARMCC::AL));

	if (NumLPads < 256) {
	BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
	.addReg(NewVReg1)
	.addImm(NumLPads)
	.add(predOps(ARMCC::AL));
	} else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
	unsigned VReg1 = MRI->createVirtualRegister(TRC);
	BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
	.addImm(NumLPads & 0xFFFF)
	.add(predOps(ARMCC::AL));

	unsigned VReg2 = VReg1;
	if ((NumLPads & 0xFFFF0000) != 0) {
	VReg2 = MRI->createVirtualRegister(TRC);
	BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
	.addReg(VReg1)
	.addImm(NumLPads >> 16)
	.add(predOps(ARMCC::AL));
	}

	BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
	.addReg(NewVReg1)
	.addReg(VReg2)
	.add(predOps(ARMCC::AL));
	} else {
	MachineConstantPool *ConstantPool = MF->getConstantPool();
	Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
	const Constant *C = ConstantInt::get(Int32Ty, NumLPads);

	// MachineConstantPool wants an explicit alignment.
	unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
	if (Align == 0)
	Align = MF->getDataLayout().getTypeAllocSize(C->getType());
	unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);

	unsigned VReg1 = MRI->createVirtualRegister(TRC);
	BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
	.addReg(VReg1, RegState::Define)
	.addConstantPoolIndex(Idx)
	.addImm(0)
	.add(predOps(ARMCC::AL));
	BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
	.addReg(NewVReg1)
	.addReg(VReg1, RegState::Kill)
	.add(predOps(ARMCC::AL));
	}

	BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
	.addMBB(TrapBB)
	.addImm(ARMCC::HI)
	.addReg(ARM::CPSR);

	unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
	BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
	.addReg(NewVReg1)
	.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
	.add(predOps(ARMCC::AL))
	.add(condCodeOp());
	unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
	BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
	.addJumpTableIndex(MJTI)
	.add(predOps(ARMCC::AL));

	MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
	MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
	unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
	BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
	.addReg(NewVReg3, RegState::Kill)
	.addReg(NewVReg4)
	.addImm(0)
	.addMemOperand(JTMMOLd)
	.add(predOps(ARMCC::AL));

	if (IsPositionIndependent) {
	BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
	.addReg(NewVReg5, RegState::Kill)
	.addReg(NewVReg4)
	.addJumpTableIndex(MJTI);
	} else {
	BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
	.addReg(NewVReg5, RegState::Kill)
	.addJumpTableIndex(MJTI);
	}
	}

	// Add the jump table entries as successors to the MBB.
	SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
	for (std::vector<MachineBasicBlock*>::iterator
	I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
	MachineBasicBlock CurMBB = I;
	if (SeenMBBs.insert(CurMBB).second)
	DispContBB->addSuccessor(CurMBB);
	}

	// N.B. the order the invoke BBs are processed in doesn't matter here.
	const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
	SmallVector<MachineBasicBlock*, 64> MBBLPads;
	for (MachineBasicBlock *BB : InvokeBBs) {

	// Remove the landing pad successor from the invoke block and replace it
	// with the new dispatch block.
	SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
	BB->succ_end());
	while (!Successors.empty()) {
	MachineBasicBlock *SMBB = Successors.pop_back_val();
	if (SMBB->isEHPad()) {
	BB->removeSuccessor(SMBB);
	MBBLPads.push_back(SMBB);
	}
	}

	BB->addSuccessor(DispatchBB, BranchProbability::getZero());
	BB->normalizeSuccProbs();

	// Find the invoke call and mark all of the callee-saved registers as
	// 'implicit defined' so that they're spilled. This prevents code from
	// moving instructions to before the EH block, where they will never be
	// executed.
	for (MachineBasicBlock::reverse_iterator
	II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
	if (!II->isCall()) continue;

	DenseMap<unsigned, bool> DefRegs;
	for (MachineInstr::mop_iterator
	OI = II->operands_begin(), OE = II->operands_end();
	OI != OE; ++OI) {
	if (!OI->isReg()) continue;
	DefRegs[OI->getReg()] = true;
	}

	MachineInstrBuilder MIB(MF, &II);

	for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
	unsigned Reg = SavedRegs[i];
	if (Subtarget->isThumb2() &&
	!ARM::tGPRRegClass.contains(Reg) &&
	!ARM::hGPRRegClass.contains(Reg))
	continue;
	if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
	continue;
	if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
	continue;
	if (!DefRegs[Reg])
	MIB.addReg(Reg, RegState::ImplicitDefine \| RegState::Dead);
	}

	break;
	}
	}

	// Mark all former landing pads as non-landing pads. The dispatch is the only
	// landing pad now.
	for (SmallVectorImpl<MachineBasicBlock*>::iterator
	I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
	(*I)->setIsEHPad(false);

	// The instruction is gone now.
	MI.eraseFromParent();
	}

	static
	MachineBasicBlock OtherSucc(MachineBasicBlock MBB, MachineBasicBlock *Succ) {
	for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
	E = MBB->succ_end(); I != E; ++I)
	if (*I != Succ)
	return *I;
	llvm_unreachable("Expecting a BB with two successors!");
	}

	/// Return the load opcode for a given load size. If load size >= 8,
	/// neon opcode will be returned.
	static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
	if (LdSize >= 8)
	return LdSize == 16 ? ARM::VLD1q32wb_fixed
	: LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
	if (IsThumb1)
	return LdSize == 4 ? ARM::tLDRi
	: LdSize == 2 ? ARM::tLDRHi
	: LdSize == 1 ? ARM::tLDRBi : 0;
	if (IsThumb2)
	return LdSize == 4 ? ARM::t2LDR_POST
	: LdSize == 2 ? ARM::t2LDRH_POST
	: LdSize == 1 ? ARM::t2LDRB_POST : 0;
	return LdSize == 4 ? ARM::LDR_POST_IMM
	: LdSize == 2 ? ARM::LDRH_POST
	: LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
	}

	/// Return the store opcode for a given store size. If store size >= 8,
	/// neon opcode will be returned.
	static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
	if (StSize >= 8)
	return StSize == 16 ? ARM::VST1q32wb_fixed
	: StSize == 8 ? ARM::VST1d32wb_fixed : 0;
	if (IsThumb1)
	return StSize == 4 ? ARM::tSTRi
	: StSize == 2 ? ARM::tSTRHi
	: StSize == 1 ? ARM::tSTRBi : 0;
	if (IsThumb2)
	return StSize == 4 ? ARM::t2STR_POST
	: StSize == 2 ? ARM::t2STRH_POST
	: StSize == 1 ? ARM::t2STRB_POST : 0;
	return StSize == 4 ? ARM::STR_POST_IMM
	: StSize == 2 ? ARM::STRH_POST
	: StSize == 1 ? ARM::STRB_POST_IMM : 0;
	}

	/// Emit a post-increment load operation with given size. The instructions
	/// will be added to BB at Pos.
	static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
	const TargetInstrInfo *TII, const DebugLoc &dl,
	unsigned LdSize, unsigned Data, unsigned AddrIn,
	unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
	unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
	assert(LdOpc != 0 && "Should have a load opcode");
	if (LdSize >= 8) {
	BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
	.addReg(AddrOut, RegState::Define)
	.addReg(AddrIn)
	.addImm(0)
	.add(predOps(ARMCC::AL));
	} else if (IsThumb1) {
	// load + update AddrIn
	BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
	.addReg(AddrIn)
	.addImm(0)
	.add(predOps(ARMCC::AL));
	BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
	.add(t1CondCodeOp())
	.addReg(AddrIn)
	.addImm(LdSize)
	.add(predOps(ARMCC::AL));
	} else if (IsThumb2) {
	BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
	.addReg(AddrOut, RegState::Define)
	.addReg(AddrIn)
	.addImm(LdSize)
	.add(predOps(ARMCC::AL));
	} else { // arm
	BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
	.addReg(AddrOut, RegState::Define)
	.addReg(AddrIn)
	.addReg(0)
	.addImm(LdSize)
	.add(predOps(ARMCC::AL));
	}
	}

	/// Emit a post-increment store operation with given size. The instructions
	/// will be added to BB at Pos.
	static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
	const TargetInstrInfo *TII, const DebugLoc &dl,
	unsigned StSize, unsigned Data, unsigned AddrIn,
	unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
	unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
	assert(StOpc != 0 && "Should have a store opcode");
	if (StSize >= 8) {
	BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
	.addReg(AddrIn)
	.addImm(0)
	.addReg(Data)
	.add(predOps(ARMCC::AL));
	} else if (IsThumb1) {
	// store + update AddrIn
	BuildMI(*BB, Pos, dl, TII->get(StOpc))
	.addReg(Data)
	.addReg(AddrIn)
	.addImm(0)
	.add(predOps(ARMCC::AL));
	BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
	.add(t1CondCodeOp())
	.addReg(AddrIn)
	.addImm(StSize)
	.add(predOps(ARMCC::AL));
	} else if (IsThumb2) {
	BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
	.addReg(Data)
	.addReg(AddrIn)
	.addImm(StSize)
	.add(predOps(ARMCC::AL));
	} else { // arm
	BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
	.addReg(Data)
	.addReg(AddrIn)
	.addReg(0)
	.addImm(StSize)
	.add(predOps(ARMCC::AL));
	}
	}

	MachineBasicBlock *
	ARMTargetLowering::EmitStructByval(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	// This pseudo instruction has 3 operands: dst, src, size
	// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
	// Otherwise, we will generate unrolled scalar copies.
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	const BasicBlock *LLVM_BB = BB->getBasicBlock();
	MachineFunction::iterator It = ++BB->getIterator();

	unsigned dest = MI.getOperand(0).getReg();
	unsigned src = MI.getOperand(1).getReg();
	unsigned SizeVal = MI.getOperand(2).getImm();
	unsigned Align = MI.getOperand(3).getImm();
	DebugLoc dl = MI.getDebugLoc();

	MachineFunction *MF = BB->getParent();
	MachineRegisterInfo &MRI = MF->getRegInfo();
	unsigned UnitSize = 0;
	const TargetRegisterClass *TRC = nullptr;
	const TargetRegisterClass *VecTRC = nullptr;

	bool IsThumb1 = Subtarget->isThumb1Only();
	bool IsThumb2 = Subtarget->isThumb2();
	bool IsThumb = Subtarget->isThumb();

	if (Align & 1) {
	UnitSize = 1;
	} else if (Align & 2) {
	UnitSize = 2;
	} else {
	// Check whether we can use NEON instructions.
	if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
	Subtarget->hasNEON()) {
	if ((Align % 16 == 0) && SizeVal >= 16)
	UnitSize = 16;
	else if ((Align % 8 == 0) && SizeVal >= 8)
	UnitSize = 8;
	}
	// Can't use NEON instructions.
	if (UnitSize == 0)
	UnitSize = 4;
	}

	// Select the correct opcode and register class for unit size load/store
	bool IsNeon = UnitSize >= 8;
	TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
	if (IsNeon)
	VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
	: UnitSize == 8 ? &ARM::DPRRegClass
	: nullptr;

	unsigned BytesLeft = SizeVal % UnitSize;
	unsigned LoopSize = SizeVal - BytesLeft;

	if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
	// Use LDR and STR to copy.
	// [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
	// [destOut] = STR_POST(scratch, destIn, UnitSize)
	unsigned srcIn = src;
	unsigned destIn = dest;
	for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
	unsigned srcOut = MRI.createVirtualRegister(TRC);
	unsigned destOut = MRI.createVirtualRegister(TRC);
	unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
	emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
	IsThumb1, IsThumb2);
	emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
	IsThumb1, IsThumb2);
	srcIn = srcOut;
	destIn = destOut;
	}

	// Handle the leftover bytes with LDRB and STRB.
	// [scratch, srcOut] = LDRB_POST(srcIn, 1)
	// [destOut] = STRB_POST(scratch, destIn, 1)
	for (unsigned i = 0; i < BytesLeft; i++) {
	unsigned srcOut = MRI.createVirtualRegister(TRC);
	unsigned destOut = MRI.createVirtualRegister(TRC);
	unsigned scratch = MRI.createVirtualRegister(TRC);
	emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
	IsThumb1, IsThumb2);
	emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
	IsThumb1, IsThumb2);
	srcIn = srcOut;
	destIn = destOut;
	}
	MI.eraseFromParent(); // The instruction is gone now.
	return BB;
	}

	// Expand the pseudo op to a loop.
	// thisMBB:
	// ...
	// movw varEnd, # --> with thumb2
	// movt varEnd, #
	// ldrcp varEnd, idx --> without thumb2
	// fallthrough --> loopMBB
	// loopMBB:
	// PHI varPhi, varEnd, varLoop
	// PHI srcPhi, src, srcLoop
	// PHI destPhi, dst, destLoop
	// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
	// [destLoop] = STR_POST(scratch, destPhi, UnitSize)
	// subs varLoop, varPhi, #UnitSize
	// bne loopMBB
	// fallthrough --> exitMBB
	// exitMBB:
	// epilogue to handle left-over bytes
	// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
	// [destOut] = STRB_POST(scratch, destLoop, 1)
	MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
	MF->insert(It, loopMBB);
	MF->insert(It, exitMBB);

	// Transfer the remainder of BB and its successor edges to exitMBB.
	exitMBB->splice(exitMBB->begin(), BB,
	std::next(MachineBasicBlock::iterator(MI)), BB->end());
	exitMBB->transferSuccessorsAndUpdatePHIs(BB);

	// Load an immediate to varEnd.
	unsigned varEnd = MRI.createVirtualRegister(TRC);
	if (Subtarget->useMovt(*MF)) {
	unsigned Vtmp = varEnd;
	if ((LoopSize & 0xFFFF0000) != 0)
	Vtmp = MRI.createVirtualRegister(TRC);
	BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
	.addImm(LoopSize & 0xFFFF)
	.add(predOps(ARMCC::AL));

	if ((LoopSize & 0xFFFF0000) != 0)
	BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
	.addReg(Vtmp)
	.addImm(LoopSize >> 16)
	.add(predOps(ARMCC::AL));
	} else {
	MachineConstantPool *ConstantPool = MF->getConstantPool();
	Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
	const Constant *C = ConstantInt::get(Int32Ty, LoopSize);

	// MachineConstantPool wants an explicit alignment.
	unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
	if (Align == 0)
	Align = MF->getDataLayout().getTypeAllocSize(C->getType());
	unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);

	if (IsThumb)
	BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
	.addReg(varEnd, RegState::Define)
	.addConstantPoolIndex(Idx)
	.add(predOps(ARMCC::AL));
	else
	BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
	.addReg(varEnd, RegState::Define)
	.addConstantPoolIndex(Idx)
	.addImm(0)
	.add(predOps(ARMCC::AL));
	}
	BB->addSuccessor(loopMBB);

	// Generate the loop body:
	// varPhi = PHI(varLoop, varEnd)
	// srcPhi = PHI(srcLoop, src)
	// destPhi = PHI(destLoop, dst)
	MachineBasicBlock *entryBB = BB;
	BB = loopMBB;
	unsigned varLoop = MRI.createVirtualRegister(TRC);
	unsigned varPhi = MRI.createVirtualRegister(TRC);
	unsigned srcLoop = MRI.createVirtualRegister(TRC);
	unsigned srcPhi = MRI.createVirtualRegister(TRC);
	unsigned destLoop = MRI.createVirtualRegister(TRC);
	unsigned destPhi = MRI.createVirtualRegister(TRC);

	BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
	.addReg(varLoop).addMBB(loopMBB)
	.addReg(varEnd).addMBB(entryBB);
	BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
	.addReg(srcLoop).addMBB(loopMBB)
	.addReg(src).addMBB(entryBB);
	BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
	.addReg(destLoop).addMBB(loopMBB)
	.addReg(dest).addMBB(entryBB);

	// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
	// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
	unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
	emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
	IsThumb1, IsThumb2);
	emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
	IsThumb1, IsThumb2);

	// Decrement loop variable by UnitSize.
	if (IsThumb1) {
	BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
	.add(t1CondCodeOp())
	.addReg(varPhi)
	.addImm(UnitSize)
	.add(predOps(ARMCC::AL));
	} else {
	MachineInstrBuilder MIB =
	BuildMI(*BB, BB->end(), dl,
	TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
	MIB.addReg(varPhi)
	.addImm(UnitSize)
	.add(predOps(ARMCC::AL))
	.add(condCodeOp());
	MIB->getOperand(5).setReg(ARM::CPSR);
	MIB->getOperand(5).setIsDef(true);
	}
	BuildMI(*BB, BB->end(), dl,
	TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
	.addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);

	// loopMBB can loop back to loopMBB or fall through to exitMBB.
	BB->addSuccessor(loopMBB);
	BB->addSuccessor(exitMBB);

	// Add epilogue to handle BytesLeft.
	BB = exitMBB;
	auto StartOfExit = exitMBB->begin();

	// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
	// [destOut] = STRB_POST(scratch, destLoop, 1)
	unsigned srcIn = srcLoop;
	unsigned destIn = destLoop;
	for (unsigned i = 0; i < BytesLeft; i++) {
	unsigned srcOut = MRI.createVirtualRegister(TRC);
	unsigned destOut = MRI.createVirtualRegister(TRC);
	unsigned scratch = MRI.createVirtualRegister(TRC);
	emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
	IsThumb1, IsThumb2);
	emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
	IsThumb1, IsThumb2);
	srcIn = srcOut;
	destIn = destOut;
	}

	MI.eraseFromParent(); // The instruction is gone now.
	return BB;
	}

	MachineBasicBlock *
	ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	const TargetMachine &TM = getTargetMachine();
	const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
	DebugLoc DL = MI.getDebugLoc();

	assert(Subtarget->isTargetWindows() &&
	"__chkstk is only supported on Windows");
	assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");

	// __chkstk takes the number of words to allocate on the stack in R4, and
	// returns the stack adjustment in number of bytes in R4. This will not
	// clober any other registers (other than the obvious lr).
	//
	// Although, technically, IP should be considered a register which may be
	// clobbered, the call itself will not touch it. Windows on ARM is a pure
	// thumb-2 environment, so there is no interworking required. As a result, we
	// do not expect a veneer to be emitted by the linker, clobbering IP.
	//
	// Each module receives its own copy of __chkstk, so no import thunk is
	// required, again, ensuring that IP is not clobbered.
	//
	// Finally, although some linkers may theoretically provide a trampoline for
	// out of range calls (which is quite common due to a 32M range limitation of
	// branches for Thumb), we can generate the long-call version via
	// -mcmodel=large, alleviating the need for the trampoline which may clobber
	// IP.

	switch (TM.getCodeModel()) {
	case CodeModel::Small:
	case CodeModel::Medium:
	case CodeModel::Default:
	case CodeModel::Kernel:
	BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
	.add(predOps(ARMCC::AL))
	.addExternalSymbol("__chkstk")
	.addReg(ARM::R4, RegState::Implicit \| RegState::Kill)
	.addReg(ARM::R4, RegState::Implicit \| RegState::Define)
	.addReg(ARM::R12,
	+ RegState::Implicit \| RegState::Define \| RegState::Dead)
	+ .addReg(ARM::CPSR,
	RegState::Implicit \| RegState::Define \| RegState::Dead);
	break;
	case CodeModel::Large:
	case CodeModel::JITDefault: {
	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
	unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);

	BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
	.addExternalSymbol("__chkstk");
	BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
	.add(predOps(ARMCC::AL))
	.addReg(Reg, RegState::Kill)
	.addReg(ARM::R4, RegState::Implicit \| RegState::Kill)
	.addReg(ARM::R4, RegState::Implicit \| RegState::Define)
	.addReg(ARM::R12,
	+ RegState::Implicit \| RegState::Define \| RegState::Dead)
	+ .addReg(ARM::CPSR,
	RegState::Implicit \| RegState::Define \| RegState::Dead);
	break;
	}
	}

	BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
	.addReg(ARM::SP, RegState::Kill)
	.addReg(ARM::R4, RegState::Kill)
	.setMIFlags(MachineInstr::FrameSetup)
	.add(predOps(ARMCC::AL))
	.add(condCodeOp());

	MI.eraseFromParent();
	return MBB;
	}

	MachineBasicBlock *
	ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	DebugLoc DL = MI.getDebugLoc();
	MachineFunction *MF = MBB->getParent();
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();

	MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
	MF->insert(++MBB->getIterator(), ContBB);
	ContBB->splice(ContBB->begin(), MBB,
	std::next(MachineBasicBlock::iterator(MI)), MBB->end());
	ContBB->transferSuccessorsAndUpdatePHIs(MBB);
	MBB->addSuccessor(ContBB);

	MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
	BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
	MF->push_back(TrapBB);
	MBB->addSuccessor(TrapBB);

	BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
	.addReg(MI.getOperand(0).getReg())
	.addImm(0)
	.add(predOps(ARMCC::AL));
	BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
	.addMBB(TrapBB)
	.addImm(ARMCC::EQ)
	.addReg(ARM::CPSR);

	MI.eraseFromParent();
	return ContBB;
	}

	MachineBasicBlock *
	ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	DebugLoc dl = MI.getDebugLoc();
	bool isThumb2 = Subtarget->isThumb2();
	switch (MI.getOpcode()) {
	default: {
	MI.print(errs());
	llvm_unreachable("Unexpected instr type to insert");
	}

	// Thumb1 post-indexed loads are really just single-register LDMs.
	case ARM::tLDR_postidx: {
	BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
	.add(MI.getOperand(1)) // Rn_wb
	.add(MI.getOperand(2)) // Rn
	.add(MI.getOperand(3)) // PredImm
	.add(MI.getOperand(4)) // PredReg
	.add(MI.getOperand(0)); // Rt
	MI.eraseFromParent();
	return BB;
	}

	// The Thumb2 pre-indexed stores have the same MI operands, they just
	// define them differently in the .td files from the isel patterns, so
	// they need pseudos.
	case ARM::t2STR_preidx:
	MI.setDesc(TII->get(ARM::t2STR_PRE));
	return BB;
	case ARM::t2STRB_preidx:
	MI.setDesc(TII->get(ARM::t2STRB_PRE));
	return BB;
	case ARM::t2STRH_preidx:
	MI.setDesc(TII->get(ARM::t2STRH_PRE));
	return BB;

	case ARM::STRi_preidx:
	case ARM::STRBi_preidx: {
	unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
	: ARM::STRB_PRE_IMM;
	// Decode the offset.
	unsigned Offset = MI.getOperand(4).getImm();
	bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
	Offset = ARM_AM::getAM2Offset(Offset);
	if (isSub)
	Offset = -Offset;

	MachineMemOperand MMO = MI.memoperands_begin();
	BuildMI(*BB, MI, dl, TII->get(NewOpc))
	.add(MI.getOperand(0)) // Rn_wb
	.add(MI.getOperand(1)) // Rt
	.add(MI.getOperand(2)) // Rn
	.addImm(Offset) // offset (skip GPR==zero_reg)
	.add(MI.getOperand(5)) // pred
	.add(MI.getOperand(6))
	.addMemOperand(MMO);
	MI.eraseFromParent();
	return BB;
	}
	case ARM::STRr_preidx:
	case ARM::STRBr_preidx:
	case ARM::STRH_preidx: {
	unsigned NewOpc;
	switch (MI.getOpcode()) {
	default: llvm_unreachable("unexpected opcode!");
	case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
	case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
	case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
	}
	MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
	for (unsigned i = 0; i < MI.getNumOperands(); ++i)
	MIB.add(MI.getOperand(i));
	MI.eraseFromParent();
	return BB;
	}

	case ARM::tMOVCCr_pseudo: {
	// To "insert" a SELECT_CC instruction, we actually have to insert the
	// diamond control-flow pattern. The incoming instruction knows the
	// destination vreg to set, the condition code register to branch on, the
	// true/false values to select between, and a branch opcode to use.
	const BasicBlock *LLVM_BB = BB->getBasicBlock();
	MachineFunction::iterator It = ++BB->getIterator();

	// thisMBB:
	// ...
	// TrueVal = ...
	// cmpTY ccX, r1, r2
	// bCC copy1MBB
	// fallthrough --> copy0MBB
	MachineBasicBlock *thisMBB = BB;
	MachineFunction *F = BB->getParent();
	MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
	F->insert(It, copy0MBB);
	F->insert(It, sinkMBB);

	// Transfer the remainder of BB and its successor edges to sinkMBB.
	sinkMBB->splice(sinkMBB->begin(), BB,
	std::next(MachineBasicBlock::iterator(MI)), BB->end());
	sinkMBB->transferSuccessorsAndUpdatePHIs(BB);

	BB->addSuccessor(copy0MBB);
	BB->addSuccessor(sinkMBB);

	BuildMI(BB, dl, TII->get(ARM::tBcc))
	.addMBB(sinkMBB)
	.addImm(MI.getOperand(3).getImm())
	.addReg(MI.getOperand(4).getReg());

	// copy0MBB:
	// %FalseValue = ...
	// # fallthrough to sinkMBB
	BB = copy0MBB;

	// Update machine-CFG edges
	BB->addSuccessor(sinkMBB);

	// sinkMBB:
	// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
	// ...
	BB = sinkMBB;
	BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
	.addReg(MI.getOperand(1).getReg())
	.addMBB(copy0MBB)
	.addReg(MI.getOperand(2).getReg())
	.addMBB(thisMBB);

	MI.eraseFromParent(); // The pseudo instruction is gone now.
	return BB;
	}

	case ARM::BCCi64:
	case ARM::BCCZi64: {
	// If there is an unconditional branch to the other successor, remove it.
	BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());

	// Compare both parts that make up the double comparison separately for
	// equality.
	bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;

	unsigned LHS1 = MI.getOperand(1).getReg();
	unsigned LHS2 = MI.getOperand(2).getReg();
	if (RHSisZero) {
	BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
	.addReg(LHS1)
	.addImm(0)
	.add(predOps(ARMCC::AL));
	BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
	.addReg(LHS2).addImm(0)
	.addImm(ARMCC::EQ).addReg(ARM::CPSR);
	} else {
	unsigned RHS1 = MI.getOperand(3).getReg();
	unsigned RHS2 = MI.getOperand(4).getReg();
	BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
	.addReg(LHS1)
	.addReg(RHS1)
	.add(predOps(ARMCC::AL));
	BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
	.addReg(LHS2).addReg(RHS2)
	.addImm(ARMCC::EQ).addReg(ARM::CPSR);
	}

	MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
	MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
	if (MI.getOperand(0).getImm() == ARMCC::NE)
	std::swap(destMBB, exitMBB);

	BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
	.addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
	if (isThumb2)
	BuildMI(BB, dl, TII->get(ARM::t2B))
	.addMBB(exitMBB)
	.add(predOps(ARMCC::AL));
	else
	BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);

	MI.eraseFromParent(); // The pseudo instruction is gone now.
	return BB;
	}

	case ARM::Int_eh_sjlj_setjmp:
	case ARM::Int_eh_sjlj_setjmp_nofp:
	case ARM::tInt_eh_sjlj_setjmp:
	case ARM::t2Int_eh_sjlj_setjmp:
	case ARM::t2Int_eh_sjlj_setjmp_nofp:
	return BB;

	case ARM::Int_eh_sjlj_setup_dispatch:
	EmitSjLjDispatchBlock(MI, BB);
	return BB;

	case ARM::ABS:
	case ARM::t2ABS: {
	// To insert an ABS instruction, we have to insert the
	// diamond control-flow pattern. The incoming instruction knows the
	// source vreg to test against 0, the destination vreg to set,
	// the condition code register to branch on, the
	// true/false values to select between, and a branch opcode to use.
	// It transforms
	// V1 = ABS V0
	// into
	// V2 = MOVS V0
	// BCC (branch to SinkBB if V0 >= 0)
	// RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
	// SinkBB: V1 = PHI(V2, V3)
	const BasicBlock *LLVM_BB = BB->getBasicBlock();
	MachineFunction::iterator BBI = ++BB->getIterator();
	MachineFunction *Fn = BB->getParent();
	MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
	Fn->insert(BBI, RSBBB);
	Fn->insert(BBI, SinkBB);

	unsigned int ABSSrcReg = MI.getOperand(1).getReg();
	unsigned int ABSDstReg = MI.getOperand(0).getReg();
	bool ABSSrcKIll = MI.getOperand(1).isKill();
	bool isThumb2 = Subtarget->isThumb2();
	MachineRegisterInfo &MRI = Fn->getRegInfo();
	// In Thumb mode S must not be specified if source register is the SP or
	// PC and if destination register is the SP, so restrict register class
	unsigned NewRsbDstReg =
	MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);

	// Transfer the remainder of BB and its successor edges to sinkMBB.
	SinkBB->splice(SinkBB->begin(), BB,
	std::next(MachineBasicBlock::iterator(MI)), BB->end());
	SinkBB->transferSuccessorsAndUpdatePHIs(BB);

	BB->addSuccessor(RSBBB);
	BB->addSuccessor(SinkBB);

	// fall through to SinkMBB
	RSBBB->addSuccessor(SinkBB);

	// insert a cmp at the end of BB
	BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
	.addReg(ABSSrcReg)
	.addImm(0)
	.add(predOps(ARMCC::AL));

	// insert a bcc with opposite CC to ARMCC::MI at the end of BB
	BuildMI(BB, dl,
	TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
	.addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);

	// insert rsbri in RSBBB
	// Note: BCC and rsbri will be converted into predicated rsbmi
	// by if-conversion pass
	BuildMI(*RSBBB, RSBBB->begin(), dl,
	TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
	.addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
	.addImm(0)
	.add(predOps(ARMCC::AL))
	.add(condCodeOp());

	// insert PHI in SinkBB,
	// reuse ABSDstReg to not change uses of ABS instruction
	BuildMI(*SinkBB, SinkBB->begin(), dl,
	TII->get(ARM::PHI), ABSDstReg)
	.addReg(NewRsbDstReg).addMBB(RSBBB)
	.addReg(ABSSrcReg).addMBB(BB);

	// remove ABS instruction
	MI.eraseFromParent();

	// return last added BB
	return SinkBB;
	}
	case ARM::COPY_STRUCT_BYVAL_I32:
	++NumLoopByVals;
	return EmitStructByval(MI, BB);
	case ARM::WIN__CHKSTK:
	return EmitLowered__chkstk(MI, BB);
	case ARM::WIN__DBZCHK:
	return EmitLowered__dbzchk(MI, BB);
	}
	}

	/// \brief Attaches vregs to MEMCPY that it will use as scratch registers
	/// when it is expanded into LDM/STM. This is done as a post-isel lowering
	/// instead of as a custom inserter because we need the use list from the SDNode.
	static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
	MachineInstr &MI, const SDNode *Node) {
	bool isThumb1 = Subtarget->isThumb1Only();

	DebugLoc DL = MI.getDebugLoc();
	MachineFunction *MF = MI.getParent()->getParent();
	MachineRegisterInfo &MRI = MF->getRegInfo();
	MachineInstrBuilder MIB(*MF, MI);

	// If the new dst/src is unused mark it as dead.
	if (!Node->hasAnyUseOfValue(0)) {
	MI.getOperand(0).setIsDead(true);
	}
	if (!Node->hasAnyUseOfValue(1)) {
	MI.getOperand(1).setIsDead(true);
	}

	// The MEMCPY both defines and kills the scratch registers.
	for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
	unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
	: &ARM::GPRRegClass);
	MIB.addReg(TmpReg, RegState::Define\|RegState::Dead);
	}
	}

	void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
	SDNode *Node) const {
	if (MI.getOpcode() == ARM::MEMCPY) {
	attachMEMCPYScratchRegs(Subtarget, MI, Node);
	return;
	}

	const MCInstrDesc *MCID = &MI.getDesc();
	// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
	// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
	// operand is still set to noreg. If needed, set the optional operand's
	// register to CPSR, and remove the redundant implicit def.
	//
	// e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).

	// Rename pseudo opcodes.
	unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
	unsigned ccOutIdx;
	if (NewOpc) {
	const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
	MCID = &TII->get(NewOpc);

	assert(MCID->getNumOperands() ==
	MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
	&& "converted opcode should be the same except for cc_out"
	" (and, on Thumb1, pred)");

	MI.setDesc(*MCID);

	// Add the optional cc_out operand
	MI.addOperand(MachineOperand::CreateReg(0, /isDef=/true));

	// On Thumb1, move all input operands to the end, then add the predicate
	if (Subtarget->isThumb1Only()) {
	for (unsigned c = MCID->getNumOperands() - 4; c--;) {
	MI.addOperand(MI.getOperand(1));
	MI.RemoveOperand(1);
	}

	// Restore the ties
	for (unsigned i = MI.getNumOperands(); i--;) {
	const MachineOperand& op = MI.getOperand(i);
	if (op.isReg() && op.isUse()) {
	int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
	if (DefIdx != -1)
	MI.tieOperands(DefIdx, i);
	}
	}

	MI.addOperand(MachineOperand::CreateImm(ARMCC::AL));
	MI.addOperand(MachineOperand::CreateReg(0, /isDef=/false));
	ccOutIdx = 1;
	} else
	ccOutIdx = MCID->getNumOperands() - 1;
	} else
	ccOutIdx = MCID->getNumOperands() - 1;

	// Any ARM instruction that sets the 's' bit should specify an optional
	// "cc_out" operand in the last operand position.
	if (!MI.hasOptionalDef() \|\| !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
	assert(!NewOpc && "Optional cc_out operand required");
	return;
	}
	// Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
	// since we already have an optional CPSR def.
	bool definesCPSR = false;
	bool deadCPSR = false;
	for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
	++i) {
	const MachineOperand &MO = MI.getOperand(i);
	if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
	definesCPSR = true;
	if (MO.isDead())
	deadCPSR = true;
	MI.RemoveOperand(i);
	break;
	}
	}
	if (!definesCPSR) {
	assert(!NewOpc && "Optional cc_out operand required");
	return;
	}
	assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
	if (deadCPSR) {
	assert(!MI.getOperand(ccOutIdx).getReg() &&
	"expect uninitialized optional cc_out operand");
	// Thumb1 instructions must have the S bit even if the CPSR is dead.
	if (!Subtarget->isThumb1Only())
	return;
	}

	// If this instruction was defined with an optional CPSR def and its dag node
	// had a live implicit CPSR def, then activate the optional CPSR def.
	MachineOperand &MO = MI.getOperand(ccOutIdx);
	MO.setReg(ARM::CPSR);
	MO.setIsDef(true);
	}

	//===----------------------------------------------------------------------===//
	// ARM Optimization Hooks
	//===----------------------------------------------------------------------===//

	// Helper function that checks if N is a null or all ones constant.
	static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
	return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
	}

	// Return true if N is conditionally 0 or all ones.
	// Detects these expressions where cc is an i1 value:
	//
	// (select cc 0, y) [AllOnes=0]
	// (select cc y, 0) [AllOnes=0]
	// (zext cc) [AllOnes=0]
	// (sext cc) [AllOnes=0/1]
	// (select cc -1, y) [AllOnes=1]
	// (select cc y, -1) [AllOnes=1]
	//
	// Invert is set when N is the null/all ones constant when CC is false.
	// OtherOp is set to the alternative value of N.
	static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
	SDValue &CC, bool &Invert,
	SDValue &OtherOp,
	SelectionDAG &DAG) {
	switch (N->getOpcode()) {
	default: return false;
	case ISD::SELECT: {
	CC = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);
	if (isZeroOrAllOnes(N1, AllOnes)) {
	Invert = false;
	OtherOp = N2;
	return true;
	}
	if (isZeroOrAllOnes(N2, AllOnes)) {
	Invert = true;
	OtherOp = N1;
	return true;
	}
	return false;
	}
	case ISD::ZERO_EXTEND:
	// (zext cc) can never be the all ones value.
	if (AllOnes)
	return false;
	LLVM_FALLTHROUGH;
	case ISD::SIGN_EXTEND: {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	CC = N->getOperand(0);
	if (CC.getValueType() != MVT::i1 \|\| CC.getOpcode() != ISD::SETCC)
	return false;
	Invert = !AllOnes;
	if (AllOnes)
	// When looking for an AllOnes constant, N is an sext, and the 'other'
	// value is 0.
	OtherOp = DAG.getConstant(0, dl, VT);
	else if (N->getOpcode() == ISD::ZERO_EXTEND)
	// When looking for a 0 constant, N can be zext or sext.
	OtherOp = DAG.getConstant(1, dl, VT);
	else
	OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
	VT);
	return true;
	}
	}
	}

	// Combine a constant select operand into its use:
	//
	// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
	// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
	// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
	// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
	// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
	//
	// The transform is rejected if the select doesn't have a constant operand that
	// is null, or all ones when AllOnes is set.
	//
	// Also recognize sext/zext from i1:
	//
	// (add (zext cc), x) -> (select cc (add x, 1), x)
	// (add (sext cc), x) -> (select cc (add x, -1), x)
	//
	// These transformations eventually create predicated instructions.
	//
	// @param N The node to transform.
	// @param Slct The N operand that is a select.
	// @param OtherOp The other N operand (x above).
	// @param DCI Context.
	// @param AllOnes Require the select constant to be all ones instead of null.
	// @returns The new node, or SDValue() on failure.
	static
	SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
	TargetLowering::DAGCombinerInfo &DCI,
	bool AllOnes = false) {
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);
	SDValue NonConstantVal;
	SDValue CCOp;
	bool SwapSelectOps;
	if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
	NonConstantVal, DAG))
	return SDValue();

	// Slct is now know to be the desired identity constant when CC is true.
	SDValue TrueVal = OtherOp;
	SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
	OtherOp, NonConstantVal);
	// Unless SwapSelectOps says CC should be false.
	if (SwapSelectOps)
	std::swap(TrueVal, FalseVal);

	return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
	CCOp, TrueVal, FalseVal);
	}

	// Attempt combineSelectAndUse on each operand of a commutative operator N.
	static
	SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
	TargetLowering::DAGCombinerInfo &DCI) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	if (N0.getNode()->hasOneUse())
	if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
	return Result;
	if (N1.getNode()->hasOneUse())
	if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
	return Result;
	return SDValue();
	}

	static bool IsVUZPShuffleNode(SDNode *N) {
	// VUZP shuffle node.
	if (N->getOpcode() == ARMISD::VUZP)
	return true;

	// "VUZP" on i32 is an alias for VTRN.
	if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
	return true;

	return false;
	}

	static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	// Look for ADD(VUZP.0, VUZP.1).
	if (!IsVUZPShuffleNode(N0.getNode()) \|\| N0.getNode() != N1.getNode() \|\|
	N0 == N1)
	return SDValue();

	// Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
	if (!N->getValueType(0).is64BitVector())
	return SDValue();

	// Generate vpadd.
	SelectionDAG &DAG = DCI.DAG;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDLoc dl(N);
	SDNode *Unzip = N0.getNode();
	EVT VT = N->getValueType(0);

	SmallVector<SDValue, 8> Ops;
	Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
	TLI.getPointerTy(DAG.getDataLayout())));
	Ops.push_back(Unzip->getOperand(0));
	Ops.push_back(Unzip->getOperand(1));

	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
	}

	static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	// Check for two extended operands.
	if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
	N1.getOpcode() == ISD::SIGN_EXTEND) &&
	!(N0.getOpcode() == ISD::ZERO_EXTEND &&
	N1.getOpcode() == ISD::ZERO_EXTEND))
	return SDValue();

	SDValue N00 = N0.getOperand(0);
	SDValue N10 = N1.getOperand(0);

	// Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
	if (!IsVUZPShuffleNode(N00.getNode()) \|\| N00.getNode() != N10.getNode() \|\|
	N00 == N10)
	return SDValue();

	// We only recognize Q register paddl here; this can't be reached until
	// after type legalization.
	if (!N00.getValueType().is64BitVector() \|\|
	!N0.getValueType().is128BitVector())
	return SDValue();

	// Generate vpaddl.
	SelectionDAG &DAG = DCI.DAG;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDLoc dl(N);
	EVT VT = N->getValueType(0);

	SmallVector<SDValue, 8> Ops;
	// Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
	unsigned Opcode;
	if (N0.getOpcode() == ISD::SIGN_EXTEND)
	Opcode = Intrinsic::arm_neon_vpaddls;
	else
	Opcode = Intrinsic::arm_neon_vpaddlu;
	Ops.push_back(DAG.getConstant(Opcode, dl,
	TLI.getPointerTy(DAG.getDataLayout())));
	EVT ElemTy = N00.getValueType().getVectorElementType();
	unsigned NumElts = VT.getVectorNumElements();
	EVT ConcatVT = EVT::getVectorVT(DAG.getContext(), ElemTy, NumElts 2);
	SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
	N00.getOperand(0), N00.getOperand(1));
	Ops.push_back(Concat);

	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
	}

	// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
	// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
	// much easier to match.
	static SDValue
	AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	// Only perform optimization if after legalize, and if NEON is available. We
	// also expected both operands to be BUILD_VECTORs.
	if (DCI.isBeforeLegalize() \|\| !Subtarget->hasNEON()
	\|\| N0.getOpcode() != ISD::BUILD_VECTOR
	\|\| N1.getOpcode() != ISD::BUILD_VECTOR)
	return SDValue();

	// Check output type since VPADDL operand elements can only be 8, 16, or 32.
	EVT VT = N->getValueType(0);
	if (!VT.isInteger() \|\| VT.getVectorElementType() == MVT::i64)
	return SDValue();

	// Check that the vector operands are of the right form.
	// N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
	// operands, where N is the size of the formed vector.
	// Each EXTRACT_VECTOR should have the same input vector and odd or even
	// index such that we have a pair wise add pattern.

	// Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
	if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();
	SDValue Vec = N0->getOperand(0)->getOperand(0);
	SDNode *V = Vec.getNode();
	unsigned nextIndex = 0;

	// For each operands to the ADD which are BUILD_VECTORs,
	// check to see if each of their operands are an EXTRACT_VECTOR with
	// the same vector and appropriate index.
	for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
	if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
	&& N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

	SDValue ExtVec0 = N0->getOperand(i);
	SDValue ExtVec1 = N1->getOperand(i);

	// First operand is the vector, verify its the same.
	if (V != ExtVec0->getOperand(0).getNode() \|\|
	V != ExtVec1->getOperand(0).getNode())
	return SDValue();

	// Second is the constant, verify its correct.
	ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
	ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));

	// For the constant, we want to see all the even or all the odd.
	if (!C0 \|\| !C1 \|\| C0->getZExtValue() != nextIndex
	\|\| C1->getZExtValue() != nextIndex+1)
	return SDValue();

	// Increment index.
	nextIndex+=2;
	} else
	return SDValue();
	}

	// Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
	// we're using the entire input vector, otherwise there's a size/legality
	// mismatch somewhere.
	if (nextIndex != Vec.getValueType().getVectorNumElements() \|\|
	Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
	return SDValue();

	// Create VPADDL node.
	SelectionDAG &DAG = DCI.DAG;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	SDLoc dl(N);

	// Build operand list.
	SmallVector<SDValue, 8> Ops;
	Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
	TLI.getPointerTy(DAG.getDataLayout())));

	// Input is the vector.
	Ops.push_back(Vec);

	// Get widened type and narrowed type.
	MVT widenType;
	unsigned numElem = VT.getVectorNumElements();

	EVT inputLaneType = Vec.getValueType().getVectorElementType();
	switch (inputLaneType.getSimpleVT().SimpleTy) {
	case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
	case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
	case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
	default:
	llvm_unreachable("Invalid vector element type for padd optimization.");
	}

	SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
	unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
	return DAG.getNode(ExtOp, dl, VT, tmp);
	}

	static SDValue findMUL_LOHI(SDValue V) {
	if (V->getOpcode() == ISD::UMUL_LOHI \|\|
	V->getOpcode() == ISD::SMUL_LOHI)
	return V;
	return SDValue();
	}

	static SDValue AddCombineTo64BitSMLAL16(SDNode AddcNode, SDNode AddeNode,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {

	if (Subtarget->isThumb()) {
	if (!Subtarget->hasDSP())
	return SDValue();
	} else if (!Subtarget->hasV5TEOps())
	return SDValue();

	// SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
	// accumulates the product into a 64-bit value. The 16-bit values will
	// be sign extended somehow or SRA'd into 32-bit values
	// (addc (adde (mul 16bit, 16bit), lo), hi)
	SDValue Mul = AddcNode->getOperand(0);
	SDValue Lo = AddcNode->getOperand(1);
	if (Mul.getOpcode() != ISD::MUL) {
	Lo = AddcNode->getOperand(0);
	Mul = AddcNode->getOperand(1);
	if (Mul.getOpcode() != ISD::MUL)
	return SDValue();
	}

	SDValue SRA = AddeNode->getOperand(0);
	SDValue Hi = AddeNode->getOperand(1);
	if (SRA.getOpcode() != ISD::SRA) {
	SRA = AddeNode->getOperand(1);
	Hi = AddeNode->getOperand(0);
	if (SRA.getOpcode() != ISD::SRA)
	return SDValue();
	}
	if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
	if (Const->getZExtValue() != 31)
	return SDValue();
	} else
	return SDValue();

	if (SRA.getOperand(0) != Mul)
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc dl(AddcNode);
	unsigned Opcode = 0;
	SDValue Op0;
	SDValue Op1;

	if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
	Opcode = ARMISD::SMLALBB;
	Op0 = Mul.getOperand(0);
	Op1 = Mul.getOperand(1);
	} else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
	Opcode = ARMISD::SMLALBT;
	Op0 = Mul.getOperand(0);
	Op1 = Mul.getOperand(1).getOperand(0);
	} else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
	Opcode = ARMISD::SMLALTB;
	Op0 = Mul.getOperand(0).getOperand(0);
	Op1 = Mul.getOperand(1);
	} else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
	Opcode = ARMISD::SMLALTT;
	Op0 = Mul->getOperand(0).getOperand(0);
	Op1 = Mul->getOperand(1).getOperand(0);
	}

	if (!Op0 \|\| !Op1)
	return SDValue();

	SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
	Op0, Op1, Lo, Hi);
	// Replace the ADDs' nodes uses by the MLA node's values.
	SDValue HiMLALResult(SMLAL.getNode(), 1);
	SDValue LoMLALResult(SMLAL.getNode(), 0);

	DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
	DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);

	// Return original node to notify the driver to stop replacing.
	SDValue resNode(AddcNode, 0);
	return resNode;
	}

	static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	// Look for multiply add opportunities.
	// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
	// each add nodes consumes a value from ISD::UMUL_LOHI and there is
	// a glue link from the first add to the second add.
	// If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
	// a S/UMLAL instruction.
	// UMUL_LOHI
	// / :lo \ :hi
	// / \ [no multiline comment]
	// loAdd -> ADDE \|
	// \ :glue /
	// \ /
	// ADDC <- hiAdd
	//
	assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE");

	assert(AddeNode->getNumOperands() == 3 &&
	AddeNode->getOperand(2).getValueType() == MVT::i32 &&
	"ADDE node has the wrong inputs");

	// Check that we have a glued ADDC node.
	SDNode* AddcNode = AddeNode->getOperand(2).getNode();
	if (AddcNode->getOpcode() != ARMISD::ADDC)
	return SDValue();

	SDValue AddcOp0 = AddcNode->getOperand(0);
	SDValue AddcOp1 = AddcNode->getOperand(1);

	// Check if the two operands are from the same mul_lohi node.
	if (AddcOp0.getNode() == AddcOp1.getNode())
	return SDValue();

	assert(AddcNode->getNumValues() == 2 &&
	AddcNode->getValueType(0) == MVT::i32 &&
	"Expect ADDC with two result values. First: i32");

	// Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
	// maybe a SMLAL which multiplies two 16-bit values.
	if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
	AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
	AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
	AddcOp1->getOpcode() != ISD::SMUL_LOHI)
	return AddCombineTo64BitSMLAL16(AddcNode, AddeNode, DCI, Subtarget);

	// Check for the triangle shape.
	SDValue AddeOp0 = AddeNode->getOperand(0);
	SDValue AddeOp1 = AddeNode->getOperand(1);

	// Make sure that the ADDE operands are not coming from the same node.
	if (AddeOp0.getNode() == AddeOp1.getNode())
	return SDValue();

	// Find the MUL_LOHI node walking up ADDE's operands.
	bool IsLeftOperandMUL = false;
	SDValue MULOp = findMUL_LOHI(AddeOp0);
	if (MULOp == SDValue())
	MULOp = findMUL_LOHI(AddeOp1);
	else
	IsLeftOperandMUL = true;
	if (MULOp == SDValue())
	return SDValue();

	// Figure out the right opcode.
	unsigned Opc = MULOp->getOpcode();
	unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;

	// Figure out the high and low input values to the MLAL node.
	SDValue* HiAdd = nullptr;
	SDValue* LoMul = nullptr;
	SDValue* LowAdd = nullptr;

	// Ensure that ADDE is from high result of ISD::SMUL_LOHI.
	if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1)))
	return SDValue();

	if (IsLeftOperandMUL)
	HiAdd = &AddeOp1;
	else
	HiAdd = &AddeOp0;


	// Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node
	// whose low result is fed to the ADDC we are checking.

	if (AddcOp0 == MULOp.getValue(0)) {
	LoMul = &AddcOp0;
	LowAdd = &AddcOp1;
	}
	if (AddcOp1 == MULOp.getValue(0)) {
	LoMul = &AddcOp1;
	LowAdd = &AddcOp0;
	}

	if (!LoMul)
	return SDValue();

	// Create the merged node.
	SelectionDAG &DAG = DCI.DAG;

	// Build operand list.
	SmallVector<SDValue, 8> Ops;
	Ops.push_back(LoMul->getOperand(0));
	Ops.push_back(LoMul->getOperand(1));
	Ops.push_back(*LowAdd);
	Ops.push_back(*HiAdd);

	SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),
	DAG.getVTList(MVT::i32, MVT::i32), Ops);

	// Replace the ADDs' nodes uses by the MLA node's values.
	SDValue HiMLALResult(MLALNode.getNode(), 1);
	DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);

	SDValue LoMLALResult(MLALNode.getNode(), 0);
	DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);

	// Return original node to notify the driver to stop replacing.
	return SDValue(AddeNode, 0);
	}

	static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	// UMAAL is similar to UMLAL except that it adds two unsigned values.
	// While trying to combine for the other MLAL nodes, first search for the
	// chance to use UMAAL. Check if Addc uses a node which has already
	// been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
	// as the addend, and it's handled in PerformUMLALCombine.

	if (!Subtarget->hasV6Ops() \|\| !Subtarget->hasDSP())
	return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);

	// Check that we have a glued ADDC node.
	SDNode* AddcNode = AddeNode->getOperand(2).getNode();
	if (AddcNode->getOpcode() != ARMISD::ADDC)
	return SDValue();

	// Find the converted UMAAL or quit if it doesn't exist.
	SDNode *UmlalNode = nullptr;
	SDValue AddHi;
	if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
	UmlalNode = AddcNode->getOperand(0).getNode();
	AddHi = AddcNode->getOperand(1);
	} else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
	UmlalNode = AddcNode->getOperand(1).getNode();
	AddHi = AddcNode->getOperand(0);
	} else {
	return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
	}

	// The ADDC should be glued to an ADDE node, which uses the same UMLAL as
	// the ADDC as well as Zero.
	if (!isNullConstant(UmlalNode->getOperand(3)))
	return SDValue();

	if ((isNullConstant(AddeNode->getOperand(0)) &&
	AddeNode->getOperand(1).getNode() == UmlalNode) \|\|
	(AddeNode->getOperand(0).getNode() == UmlalNode &&
	isNullConstant(AddeNode->getOperand(1)))) {

	SelectionDAG &DAG = DCI.DAG;
	SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
	UmlalNode->getOperand(2), AddHi };
	SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
	DAG.getVTList(MVT::i32, MVT::i32), Ops);

	// Replace the ADDs' nodes uses by the UMAAL node's values.
	DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
	DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));

	// Return original node to notify the driver to stop replacing.
	return SDValue(AddeNode, 0);
	}
	return SDValue();
	}

	static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG,
	const ARMSubtarget *Subtarget) {
	if (!Subtarget->hasV6Ops() \|\| !Subtarget->hasDSP())
	return SDValue();

	// Check that we have a pair of ADDC and ADDE as operands.
	// Both addends of the ADDE must be zero.
	SDNode* AddcNode = N->getOperand(2).getNode();
	SDNode* AddeNode = N->getOperand(3).getNode();
	if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
	(AddeNode->getOpcode() == ARMISD::ADDE) &&
	isNullConstant(AddeNode->getOperand(0)) &&
	isNullConstant(AddeNode->getOperand(1)) &&
	(AddeNode->getOperand(2).getNode() == AddcNode))
	return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
	DAG.getVTList(MVT::i32, MVT::i32),
	{N->getOperand(0), N->getOperand(1),
	AddcNode->getOperand(0), AddcNode->getOperand(1)});
	else
	return SDValue();
	}

	static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG,
	const ARMSubtarget *Subtarget) {
	if (Subtarget->isThumb1Only()) {
	SDValue RHS = N->getOperand(1);
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
	int32_t imm = C->getSExtValue();
	if (imm < 0 && imm > INT_MIN) {
	SDLoc DL(N);
	RHS = DAG.getConstant(-imm, DL, MVT::i32);
	unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
	: ARMISD::ADDC;
	return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
	}
	}
	}
	return SDValue();
	}

	static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG,
	const ARMSubtarget *Subtarget) {
	if (Subtarget->isThumb1Only()) {
	SDValue RHS = N->getOperand(1);
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
	int64_t imm = C->getSExtValue();
	if (imm < 0) {
	SDLoc DL(N);

	// The with-carry-in form matches bitwise not instead of the negation.
	// Effectively, the inverse interpretation of the carry flag already
	// accounts for part of the negation.
	RHS = DAG.getConstant(~imm, DL, MVT::i32);

	unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
	: ARMISD::ADDE;
	return DAG.getNode(Opcode, DL, N->getVTList(),
	N->getOperand(0), RHS, N->getOperand(2));
	}
	}
	}
	return SDValue();
	}

	/// PerformADDECombine - Target-specific dag combine transform from
	/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
	/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
	static SDValue PerformADDECombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	// Only ARM and Thumb2 support UMLAL/SMLAL.
	if (Subtarget->isThumb1Only())
	return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);

	// Only perform the checks after legalize when the pattern is available.
	if (DCI.isBeforeLegalize()) return SDValue();

	return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
	}

	/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
	/// operands N0 and N1. This is a helper for PerformADDCombine that is
	/// called with the default operands, and if that fails, with commuted
	/// operands.
	static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget){
	// Attempt to create vpadd for this add.
	if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
	return Result;

	// Attempt to create vpaddl for this add.
	if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
	return Result;
	if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
	Subtarget))
	return Result;

	// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
	if (N0.getNode()->hasOneUse())
	if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
	return Result;
	return SDValue();
	}

	/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
	///
	static SDValue PerformADDCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// First try with the default operand order.
	if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
	return Result;

	// If that didn't work, try again with the operands commuted.
	return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
	}

	/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
	///
	static SDValue PerformSUBCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
	if (N1.getNode()->hasOneUse())
	if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
	return Result;

	return SDValue();
	}

	/// PerformVMULCombine
	/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
	/// special multiplier accumulator forwarding.
	/// vmul d3, d0, d2
	/// vmla d3, d1, d2
	/// is faster than
	/// vadd d3, d0, d1
	/// vmul d3, d3, d2
	// However, for (A + B) * (A + B),
	// vadd d2, d0, d1
	// vmul d3, d0, d2
	// vmla d3, d1, d2
	// is slower than
	// vadd d2, d0, d1
	// vmul d3, d2, d2
	static SDValue PerformVMULCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	if (!Subtarget->hasVMLxForwarding())
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	unsigned Opcode = N0.getOpcode();
	if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
	Opcode != ISD::FADD && Opcode != ISD::FSUB) {
	Opcode = N1.getOpcode();
	if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
	Opcode != ISD::FADD && Opcode != ISD::FSUB)
	return SDValue();
	std::swap(N0, N1);
	}

	if (N0 == N1)
	return SDValue();

	EVT VT = N->getValueType(0);
	SDLoc DL(N);
	SDValue N00 = N0->getOperand(0);
	SDValue N01 = N0->getOperand(1);
	return DAG.getNode(Opcode, DL, VT,
	DAG.getNode(ISD::MUL, DL, VT, N00, N1),
	DAG.getNode(ISD::MUL, DL, VT, N01, N1));
	}

	static SDValue PerformMULCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	SelectionDAG &DAG = DCI.DAG;

	if (Subtarget->isThumb1Only())
	return SDValue();

	if (DCI.isBeforeLegalize() \|\| DCI.isCalledByLegalizer())
	return SDValue();

	EVT VT = N->getValueType(0);
	if (VT.is64BitVector() \|\| VT.is128BitVector())
	return PerformVMULCombine(N, DCI, Subtarget);
	if (VT != MVT::i32)
	return SDValue();

	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!C)
	return SDValue();

	int64_t MulAmt = C->getSExtValue();
	unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);

	ShiftAmt = ShiftAmt & (32 - 1);
	SDValue V = N->getOperand(0);
	SDLoc DL(N);

	SDValue Res;
	MulAmt >>= ShiftAmt;

	if (MulAmt >= 0) {
	if (isPowerOf2_32(MulAmt - 1)) {
	// (mul x, 2^N + 1) => (add (shl x, N), x)
	Res = DAG.getNode(ISD::ADD, DL, VT,
	V,
	DAG.getNode(ISD::SHL, DL, VT,
	V,
	DAG.getConstant(Log2_32(MulAmt - 1), DL,
	MVT::i32)));
	} else if (isPowerOf2_32(MulAmt + 1)) {
	// (mul x, 2^N - 1) => (sub (shl x, N), x)
	Res = DAG.getNode(ISD::SUB, DL, VT,
	DAG.getNode(ISD::SHL, DL, VT,
	V,
	DAG.getConstant(Log2_32(MulAmt + 1), DL,
	MVT::i32)),
	V);
	} else
	return SDValue();
	} else {
	uint64_t MulAmtAbs = -MulAmt;
	if (isPowerOf2_32(MulAmtAbs + 1)) {
	// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
	Res = DAG.getNode(ISD::SUB, DL, VT,
	V,
	DAG.getNode(ISD::SHL, DL, VT,
	V,
	DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
	MVT::i32)));
	} else if (isPowerOf2_32(MulAmtAbs - 1)) {
	// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
	Res = DAG.getNode(ISD::ADD, DL, VT,
	V,
	DAG.getNode(ISD::SHL, DL, VT,
	V,
	DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
	MVT::i32)));
	Res = DAG.getNode(ISD::SUB, DL, VT,
	DAG.getConstant(0, DL, MVT::i32), Res);

	} else
	return SDValue();
	}

	if (ShiftAmt != 0)
	Res = DAG.getNode(ISD::SHL, DL, VT,
	Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));

	// Do not add new nodes to DAG combiner worklist.
	DCI.CombineTo(N, Res, false);
	return SDValue();
	}

	static SDValue PerformANDCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	// Attempt to use immediate-form VBIC
	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	SelectionDAG &DAG = DCI.DAG;

	if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	APInt SplatBits, SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	if (BVN &&
	BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
	if (SplatBitSize <= 64) {
	EVT VbicVT;
	SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
	SplatUndef.getZExtValue(), SplatBitSize,
	DAG, dl, VbicVT, VT.is128BitVector(),
	OtherModImm);
	if (Val.getNode()) {
	SDValue Input =
	DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
	SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
	return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
	}
	}
	}

	if (!Subtarget->isThumb1Only()) {
	// fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
	if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
	return Result;
	}

	return SDValue();
	}

	// Try combining OR nodes to SMULWB, SMULWT.
	static SDValue PerformORCombineToSMULWBT(SDNode *OR,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	if (!Subtarget->hasV6Ops() \|\|
	(Subtarget->isThumb() &&
	(!Subtarget->hasThumb2() \|\| !Subtarget->hasDSP())))
	return SDValue();

	SDValue SRL = OR->getOperand(0);
	SDValue SHL = OR->getOperand(1);

	if (SRL.getOpcode() != ISD::SRL \|\| SHL.getOpcode() != ISD::SHL) {
	SRL = OR->getOperand(1);
	SHL = OR->getOperand(0);
	}
	if (!isSRL16(SRL) \|\| !isSHL16(SHL))
	return SDValue();

	// The first operands to the shifts need to be the two results from the
	// same smul_lohi node.
	if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) \|\|
	SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
	return SDValue();

	SDNode *SMULLOHI = SRL.getOperand(0).getNode();
	if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) \|\|
	SHL.getOperand(0) != SDValue(SMULLOHI, 1))
	return SDValue();

	// Now we have:
	// (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
	// For SMUL[B\|T] smul_lohi will take a 32-bit and a 16-bit arguments.
	// For SMUWB the 16-bit value will signed extended somehow.
	// For SMULWT only the SRA is required.
	// Check both sides of SMUL_LOHI
	SDValue OpS16 = SMULLOHI->getOperand(0);
	SDValue OpS32 = SMULLOHI->getOperand(1);

	SelectionDAG &DAG = DCI.DAG;
	if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
	OpS16 = OpS32;
	OpS32 = SMULLOHI->getOperand(0);
	}

	SDLoc dl(OR);
	unsigned Opcode = 0;
	if (isS16(OpS16, DAG))
	Opcode = ARMISD::SMULWB;
	else if (isSRA16(OpS16)) {
	Opcode = ARMISD::SMULWT;
	OpS16 = OpS16->getOperand(0);
	}
	else
	return SDValue();

	SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
	DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
	return SDValue(OR, 0);
	}

	/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
	static SDValue PerformORCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	// Attempt to use immediate-form VORR
	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	SelectionDAG &DAG = DCI.DAG;

	if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	APInt SplatBits, SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	if (BVN && Subtarget->hasNEON() &&
	BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
	if (SplatBitSize <= 64) {
	EVT VorrVT;
	SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
	SplatUndef.getZExtValue(), SplatBitSize,
	DAG, dl, VorrVT, VT.is128BitVector(),
	OtherModImm);
	if (Val.getNode()) {
	SDValue Input =
	DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
	SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
	return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
	}
	}
	}

	if (!Subtarget->isThumb1Only()) {
	// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
	if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
	return Result;
	if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
	return Result;
	}

	// The code below optimizes (or (and X, Y), Z).
	// The AND operand needs to have a single user to make these optimizations
	// profitable.
	SDValue N0 = N->getOperand(0);
	if (N0.getOpcode() != ISD::AND \|\| !N0.hasOneUse())
	return SDValue();
	SDValue N1 = N->getOperand(1);

	// (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
	if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
	DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
	APInt SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;

	APInt SplatBits0, SplatBits1;
	BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
	BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
	// Ensure that the second operand of both ands are constants
	if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
	HasAnyUndefs) && !HasAnyUndefs) {
	if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
	HasAnyUndefs) && !HasAnyUndefs) {
	// Ensure that the bit width of the constants are the same and that
	// the splat arguments are logical inverses as per the pattern we
	// are trying to simplify.
	if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
	SplatBits0 == ~SplatBits1) {
	// Canonicalize the vector type to make instruction selection
	// simpler.
	EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
	SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
	N0->getOperand(1),
	N0->getOperand(0),
	N1->getOperand(0));
	return DAG.getNode(ISD::BITCAST, dl, VT, Result);
	}
	}
	}
	}

	// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
	// reasonable.

	// BFI is only available on V6T2+
	if (Subtarget->isThumb1Only() \|\| !Subtarget->hasV6T2Ops())
	return SDValue();

	SDLoc DL(N);
	// 1) or (and A, mask), val => ARMbfi A, val, mask
	// iff (val & mask) == val
	//
	// 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
	// 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
	// && mask == ~mask2
	// 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
	// && ~mask == mask2
	// (i.e., copy a bitfield value into another bitfield of the same width)

	if (VT != MVT::i32)
	return SDValue();

	SDValue N00 = N0.getOperand(0);

	// The value and the mask need to be constants so we can verify this is
	// actually a bitfield set. If the mask is 0xffff, we can do better
	// via a movt instruction, so don't use BFI in that case.
	SDValue MaskOp = N0.getOperand(1);
	ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
	if (!MaskC)
	return SDValue();
	unsigned Mask = MaskC->getZExtValue();
	if (Mask == 0xffff)
	return SDValue();
	SDValue Res;
	// Case (1): or (and A, mask), val => ARMbfi A, val, mask
	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
	if (N1C) {
	unsigned Val = N1C->getZExtValue();
	if ((Val & ~Mask) != Val)
	return SDValue();

	if (ARM::isBitFieldInvertedMask(Mask)) {
	Val >>= countTrailingZeros(~Mask);

	Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
	DAG.getConstant(Val, DL, MVT::i32),
	DAG.getConstant(Mask, DL, MVT::i32));

	// Do not add new nodes to DAG combiner worklist.
	DCI.CombineTo(N, Res, false);
	return SDValue();
	}
	} else if (N1.getOpcode() == ISD::AND) {
	// case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
	ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
	if (!N11C)
	return SDValue();
	unsigned Mask2 = N11C->getZExtValue();

	// Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
	// as is to match.
	if (ARM::isBitFieldInvertedMask(Mask) &&
	(Mask == ~Mask2)) {
	// The pack halfword instruction works better for masks that fit it,
	// so use that when it's available.
	if (Subtarget->hasDSP() &&
	(Mask == 0xffff \|\| Mask == 0xffff0000))
	return SDValue();
	// 2a
	unsigned amt = countTrailingZeros(Mask2);
	Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
	DAG.getConstant(amt, DL, MVT::i32));
	Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
	DAG.getConstant(Mask, DL, MVT::i32));
	// Do not add new nodes to DAG combiner worklist.
	DCI.CombineTo(N, Res, false);
	return SDValue();
	} else if (ARM::isBitFieldInvertedMask(~Mask) &&
	(~Mask == Mask2)) {
	// The pack halfword instruction works better for masks that fit it,
	// so use that when it's available.
	if (Subtarget->hasDSP() &&
	(Mask2 == 0xffff \|\| Mask2 == 0xffff0000))
	return SDValue();
	// 2b
	unsigned lsb = countTrailingZeros(Mask);
	Res = DAG.getNode(ISD::SRL, DL, VT, N00,
	DAG.getConstant(lsb, DL, MVT::i32));
	Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
	DAG.getConstant(Mask2, DL, MVT::i32));
	// Do not add new nodes to DAG combiner worklist.
	DCI.CombineTo(N, Res, false);
	return SDValue();
	}
	}

	if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
	N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
	ARM::isBitFieldInvertedMask(~Mask)) {
	// Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
	// where lsb(mask) == #shamt and masked bits of B are known zero.
	SDValue ShAmt = N00.getOperand(1);
	unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
	unsigned LSB = countTrailingZeros(Mask);
	if (ShAmtC != LSB)
	return SDValue();

	Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
	DAG.getConstant(~Mask, DL, MVT::i32));

	// Do not add new nodes to DAG combiner worklist.
	DCI.CombineTo(N, Res, false);
	}

	return SDValue();
	}

	static SDValue PerformXORCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	EVT VT = N->getValueType(0);
	SelectionDAG &DAG = DCI.DAG;

	if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	if (!Subtarget->isThumb1Only()) {
	// fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
	if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
	return Result;
	}

	return SDValue();
	}

	// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
	// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
	// their position in "to" (Rd).
	static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
	assert(N->getOpcode() == ARMISD::BFI);

	SDValue From = N->getOperand(1);
	ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
	FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());

	// If the Base came from a SHR #C, we can deduce that it is really testing bit
	// #C in the base of the SHR.
	if (From->getOpcode() == ISD::SRL &&
	isa<ConstantSDNode>(From->getOperand(1))) {
	APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
	assert(Shift.getLimitedValue() < 32 && "Shift too large!");
	FromMask <<= Shift.getLimitedValue(31);
	From = From->getOperand(0);
	}

	return From;
	}

	// If A and B contain one contiguous set of bits, does A \| B == A . B?
	//
	// Neither A nor B must be zero.
	static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
	unsigned LastActiveBitInA = A.countTrailingZeros();
	unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
	return LastActiveBitInA - 1 == FirstActiveBitInB;
	}

	static SDValue FindBFIToCombineWith(SDNode *N) {
	// We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
	// if one exists.
	APInt ToMask, FromMask;
	SDValue From = ParseBFI(N, ToMask, FromMask);
	SDValue To = N->getOperand(0);

	// Now check for a compatible BFI to merge with. We can pass through BFIs that
	// aren't compatible, but not if they set the same bit in their destination as
	// we do (or that of any BFI we're going to combine with).
	SDValue V = To;
	APInt CombinedToMask = ToMask;
	while (V.getOpcode() == ARMISD::BFI) {
	APInt NewToMask, NewFromMask;
	SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
	if (NewFrom != From) {
	// This BFI has a different base. Keep going.
	CombinedToMask \|= NewToMask;
	V = V.getOperand(0);
	continue;
	}

	// Do the written bits conflict with any we've seen so far?
	if ((NewToMask & CombinedToMask).getBoolValue())
	// Conflicting bits - bail out because going further is unsafe.
	return SDValue();

	// Are the new bits contiguous when combined with the old bits?
	if (BitsProperlyConcatenate(ToMask, NewToMask) &&
	BitsProperlyConcatenate(FromMask, NewFromMask))
	return V;
	if (BitsProperlyConcatenate(NewToMask, ToMask) &&
	BitsProperlyConcatenate(NewFromMask, FromMask))
	return V;

	// We've seen a write to some bits, so track it.
	CombinedToMask \|= NewToMask;
	// Keep going...
	V = V.getOperand(0);
	}

	return SDValue();
	}

	static SDValue PerformBFICombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	SDValue N1 = N->getOperand(1);
	if (N1.getOpcode() == ISD::AND) {
	// (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
	// the bits being cleared by the AND are not demanded by the BFI.
	ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
	if (!N11C)
	return SDValue();
	unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
	unsigned LSB = countTrailingZeros(~InvMask);
	unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
	assert(Width <
	static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
	"undefined behavior");
	unsigned Mask = (1u << Width) - 1;
	unsigned Mask2 = N11C->getZExtValue();
	if ((Mask & (~Mask2)) == 0)
	return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
	N->getOperand(0), N1.getOperand(0),
	N->getOperand(2));
	} else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
	// We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
	// Keep track of any consecutive bits set that all come from the same base
	// value. We can combine these together into a single BFI.
	SDValue CombineBFI = FindBFIToCombineWith(N);
	if (CombineBFI == SDValue())
	return SDValue();

	// We've found a BFI.
	APInt ToMask1, FromMask1;
	SDValue From1 = ParseBFI(N, ToMask1, FromMask1);

	APInt ToMask2, FromMask2;
	SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
	assert(From1 == From2);
	(void)From2;

	// First, unlink CombineBFI.
	DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
	// Then create a new BFI, combining the two together.
	APInt NewFromMask = FromMask1 \| FromMask2;
	APInt NewToMask = ToMask1 \| ToMask2;

	EVT VT = N->getValueType(0);
	SDLoc dl(N);

	if (NewFromMask[0] == 0)
	From1 = DCI.DAG.getNode(
	ISD::SRL, dl, VT, From1,
	DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
	return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
	DCI.DAG.getConstant(~NewToMask, dl, VT));
	}
	return SDValue();
	}

	/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
	/// ARMISD::VMOVRRD.
	static SDValue PerformVMOVRRDCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	// vmovrrd(vmovdrr x, y) -> x,y
	SDValue InDouble = N->getOperand(0);
	if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
	return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));

	// vmovrrd(load f64) -> (load i32), (load i32)
	SDNode *InNode = InDouble.getNode();
	if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
	InNode->getValueType(0) == MVT::f64 &&
	InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
	!cast<LoadSDNode>(InNode)->isVolatile()) {
	// TODO: Should this be done for non-FrameIndex operands?
	LoadSDNode *LD = cast<LoadSDNode>(InNode);

	SelectionDAG &DAG = DCI.DAG;
	SDLoc DL(LD);
	SDValue BasePtr = LD->getBasePtr();
	SDValue NewLD1 =
	DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
	LD->getAlignment(), LD->getMemOperand()->getFlags());

	SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
	DAG.getConstant(4, DL, MVT::i32));
	SDValue NewLD2 = DAG.getLoad(
	MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
	std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());

	DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
	if (DCI.DAG.getDataLayout().isBigEndian())
	std::swap (NewLD1, NewLD2);
	SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
	return Result;
	}

	return SDValue();
	}

	/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
	/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
	static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
	// N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);
	if (Op0.getOpcode() == ISD::BITCAST)
	Op0 = Op0.getOperand(0);
	if (Op1.getOpcode() == ISD::BITCAST)
	Op1 = Op1.getOperand(0);
	if (Op0.getOpcode() == ARMISD::VMOVRRD &&
	Op0.getNode() == Op1.getNode() &&
	Op0.getResNo() == 0 && Op1.getResNo() == 1)
	return DAG.getNode(ISD::BITCAST, SDLoc(N),
	N->getValueType(0), Op0.getOperand(0));
	return SDValue();
	}

	/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
	/// are normal, non-volatile loads. If so, it is profitable to bitcast an
	/// i64 vector to have f64 elements, since the value can then be loaded
	/// directly into a VFP register.
	static bool hasNormalLoadOperand(SDNode *N) {
	unsigned NumElts = N->getValueType(0).getVectorNumElements();
	for (unsigned i = 0; i < NumElts; ++i) {
	SDNode *Elt = N->getOperand(i).getNode();
	if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
	return true;
	}
	return false;
	}

	/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
	/// ISD::BUILD_VECTOR.
	static SDValue PerformBUILD_VECTORCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const ARMSubtarget *Subtarget) {
	// build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
	// VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
	// into a pair of GPRs, which is fine when the value is used as a scalar,
	// but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
	SelectionDAG &DAG = DCI.DAG;
	if (N->getNumOperands() == 2)
	if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
	return RV;

	// Load i64 elements as f64 values so that type legalization does not split
	// them up into i32 values.
	EVT VT = N->getValueType(0);
	if (VT.getVectorElementType() != MVT::i64 \|\| !hasNormalLoadOperand(N))
	return SDValue();
	SDLoc dl(N);
	SmallVector<SDValue, 8> Ops;
	unsigned NumElts = VT.getVectorNumElements();
	for (unsigned i = 0; i < NumElts; ++i) {
	SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
	Ops.push_back(V);
	// Make the DAGCombiner fold the bitcast.
	DCI.AddToWorklist(V.getNode());
	}
	EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
	SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
	return DAG.getNode(ISD::BITCAST, dl, VT, BV);
	}

	/// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
	static SDValue
	PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
	// ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
	// At that time, we may have inserted bitcasts from integer to float.
	// If these bitcasts have survived DAGCombine, change the lowering of this
	// BUILD_VECTOR in something more vector friendly, i.e., that does not
	// force to use floating point types.

	// Make sure we can change the type of the vector.
	// This is possible iff:
	// 1. The vector is only used in a bitcast to a integer type. I.e.,
	// 1.1. Vector is used only once.
	// 1.2. Use is a bit convert to an integer type.
	// 2. The size of its operands are 32-bits (64-bits are not legal).
	EVT VT = N->getValueType(0);
	EVT EltVT = VT.getVectorElementType();

	// Check 1.1. and 2.
	if (EltVT.getSizeInBits() != 32 \|\| !N->hasOneUse())
	return SDValue();

	// By construction, the input type must be float.
	assert(EltVT == MVT::f32 && "Unexpected type!");

	// Check 1.2.
	SDNode Use = N->use_begin();
	if (Use->getOpcode() != ISD::BITCAST \|\|
	Use->getValueType(0).isFloatingPoint())
	return SDValue();

	// Check profitability.
	// Model is, if more than half of the relevant operands are bitcast from
	// i32, turn the build_vector into a sequence of insert_vector_elt.
	// Relevant operands are everything that is not statically
	// (i.e., at compile time) bitcasted.
	unsigned NumOfBitCastedElts = 0;
	unsigned NumElts = VT.getVectorNumElements();
	unsigned NumOfRelevantElts = NumElts;
	for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
	SDValue Elt = N->getOperand(Idx);
	if (Elt->getOpcode() == ISD::BITCAST) {
	// Assume only bit cast to i32 will go away.
	if (Elt->getOperand(0).getValueType() == MVT::i32)
	++NumOfBitCastedElts;
	} else if (Elt.isUndef() \|\| isa<ConstantSDNode>(Elt))
	// Constants are statically casted, thus do not count them as
	// relevant operands.
	--NumOfRelevantElts;
	}

	// Check if more than half of the elements require a non-free bitcast.
	if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	// Create the new vector type.
	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
	// Check if the type is legal.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isTypeLegal(VecVT))
	return SDValue();

	// Combine:
	// ARMISD::BUILD_VECTOR E1, E2, ..., EN.
	// => BITCAST INSERT_VECTOR_ELT
	// (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
	// (BITCAST EN), N.
	SDValue Vec = DAG.getUNDEF(VecVT);
	SDLoc dl(N);
	for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
	SDValue V = N->getOperand(Idx);
	if (V.isUndef())
	continue;
	if (V.getOpcode() == ISD::BITCAST &&
	V->getOperand(0).getValueType() == MVT::i32)
	// Fold obvious case.
	V = V.getOperand(0);
	else {
	V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
	// Make the DAGCombiner fold the bitcasts.
	DCI.AddToWorklist(V.getNode());
	}
	SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
	Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
	}
	Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
	// Make the DAGCombiner fold the bitcasts.
	DCI.AddToWorklist(Vec.getNode());
	return Vec;
	}

	/// PerformInsertEltCombine - Target-specific dag combine xforms for
	/// ISD::INSERT_VECTOR_ELT.
	static SDValue PerformInsertEltCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	// Bitcast an i64 load inserted into a vector to f64.
	// Otherwise, the i64 value will be legalized to a pair of i32 values.
	EVT VT = N->getValueType(0);
	SDNode *Elt = N->getOperand(1).getNode();
	if (VT.getVectorElementType() != MVT::i64 \|\|
	!ISD::isNormalLoad(Elt) \|\| cast<LoadSDNode>(Elt)->isVolatile())
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc dl(N);
	EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
	VT.getVectorNumElements());
	SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
	SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
	// Make the DAGCombiner fold the bitcasts.
	DCI.AddToWorklist(Vec.getNode());
	DCI.AddToWorklist(V.getNode());
	SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
	Vec, V, N->getOperand(2));
	return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
	}

	/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
	/// ISD::VECTOR_SHUFFLE.
	static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
	// The LLVM shufflevector instruction does not require the shuffle mask
	// length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
	// have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
	// operands do not match the mask length, they are extended by concatenating
	// them with undef vectors. That is probably the right thing for other
	// targets, but for NEON it is better to concatenate two double-register
	// size vector operands into a single quad-register size vector. Do that
	// transformation here:
	// shuffle(concat(v1, undef), concat(v2, undef)) ->
	// shuffle(concat(v1, v2), undef)
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);
	if (Op0.getOpcode() != ISD::CONCAT_VECTORS \|\|
	Op1.getOpcode() != ISD::CONCAT_VECTORS \|\|
	Op0.getNumOperands() != 2 \|\|
	Op1.getNumOperands() != 2)
	return SDValue();
	SDValue Concat0Op1 = Op0.getOperand(1);
	SDValue Concat1Op1 = Op1.getOperand(1);
	if (!Concat0Op1.isUndef() \|\| !Concat1Op1.isUndef())
	return SDValue();
	// Skip the transformation if any of the types are illegal.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT VT = N->getValueType(0);
	if (!TLI.isTypeLegal(VT) \|\|
	!TLI.isTypeLegal(Concat0Op1.getValueType()) \|\|
	!TLI.isTypeLegal(Concat1Op1.getValueType()))
	return SDValue();

	SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
	Op0.getOperand(0), Op1.getOperand(0));
	// Translate the shuffle mask.
	SmallVector<int, 16> NewMask;
	unsigned NumElts = VT.getVectorNumElements();
	unsigned HalfElts = NumElts/2;
	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
	for (unsigned n = 0; n < NumElts; ++n) {
	int MaskElt = SVN->getMaskElt(n);
	int NewElt = -1;
	if (MaskElt < (int)HalfElts)
	NewElt = MaskElt;
	else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
	NewElt = HalfElts + MaskElt - NumElts;
	NewMask.push_back(NewElt);
	}
	return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
	DAG.getUNDEF(VT), NewMask);
	}

	/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
	/// NEON load/store intrinsics, and generic vector load/stores, to merge
	/// base address updates.
	/// For generic load/stores, the memory type is assumed to be a vector.
	/// The caller is assumed to have checked legality.
	static SDValue CombineBaseUpdate(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	SelectionDAG &DAG = DCI.DAG;
	const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID \|\|
	N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
	const bool isStore = N->getOpcode() == ISD::STORE;
	const unsigned AddrOpIdx = ((isIntrinsic \|\| isStore) ? 2 : 1);
	SDValue Addr = N->getOperand(AddrOpIdx);
	MemSDNode *MemN = cast<MemSDNode>(N);
	SDLoc dl(N);

	// Search for a use of the address operand that is an increment.
	for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
	UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
	SDNode User = UI;
	if (User->getOpcode() != ISD::ADD \|\|
	UI.getUse().getResNo() != Addr.getResNo())
	continue;

	// Check that the add is independent of the load/store. Otherwise, folding
	// it would create a cycle.
	if (User->isPredecessorOf(N) \|\| N->isPredecessorOf(User))
	continue;

	// Find the new opcode for the updating load/store.
	bool isLoadOp = true;
	bool isLaneOp = false;
	unsigned NewOpc = 0;
	unsigned NumVecs = 0;
	if (isIntrinsic) {
	unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
	switch (IntNo) {
	default: llvm_unreachable("unexpected intrinsic for Neon base update");
	case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
	NumVecs = 1; break;
	case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
	NumVecs = 2; break;
	case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
	NumVecs = 3; break;
	case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
	NumVecs = 4; break;
	case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
	NumVecs = 2; isLaneOp = true; break;
	case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
	NumVecs = 3; isLaneOp = true; break;
	case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
	NumVecs = 4; isLaneOp = true; break;
	case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
	NumVecs = 1; isLoadOp = false; break;
	case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
	NumVecs = 2; isLoadOp = false; break;
	case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
	NumVecs = 3; isLoadOp = false; break;
	case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
	NumVecs = 4; isLoadOp = false; break;
	case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
	NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
	case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
	NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
	case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
	NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
	}
	} else {
	isLaneOp = true;
	switch (N->getOpcode()) {
	default: llvm_unreachable("unexpected opcode for Neon base update");
	case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
	case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
	case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
	case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
	case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
	NumVecs = 1; isLaneOp = false; break;
	case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
	NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
	}
	}

	// Find the size of memory referenced by the load/store.
	EVT VecTy;
	if (isLoadOp) {
	VecTy = N->getValueType(0);
	} else if (isIntrinsic) {
	VecTy = N->getOperand(AddrOpIdx+1).getValueType();
	} else {
	assert(isStore && "Node has to be a load, a store, or an intrinsic!");
	VecTy = N->getOperand(1).getValueType();
	}

	unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
	if (isLaneOp)
	NumBytes /= VecTy.getVectorNumElements();

	// If the increment is a constant, it must match the memory ref size.
	SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
	ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
	if (NumBytes >= 3 * 16 && (!CInc \|\| CInc->getZExtValue() != NumBytes)) {
	// VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
	// separate instructions that make it harder to use a non-constant update.
	continue;
	}

	// OK, we found an ADD we can fold into the base update.
	// Now, create a _UPD node, taking care of not breaking alignment.

	EVT AlignedVecTy = VecTy;
	unsigned Alignment = MemN->getAlignment();

	// If this is a less-than-standard-aligned load/store, change the type to
	// match the standard alignment.
	// The alignment is overlooked when selecting _UPD variants; and it's
	// easier to introduce bitcasts here than fix that.
	// There are 3 ways to get to this base-update combine:
	// - intrinsics: they are assumed to be properly aligned (to the standard
	// alignment of the memory type), so we don't need to do anything.
	// - ARMISD::VLDx nodes: they are only generated from the aforementioned
	// intrinsics, so, likewise, there's nothing to do.
	// - generic load/store instructions: the alignment is specified as an
	// explicit operand, rather than implicitly as the standard alignment
	// of the memory type (like the intrisics). We need to change the
	// memory type to match the explicit alignment. That way, we don't
	// generate non-standard-aligned ARMISD::VLDx nodes.
	if (isa<LSBaseSDNode>(N)) {
	if (Alignment == 0)
	Alignment = 1;
	if (Alignment < VecTy.getScalarSizeInBits() / 8) {
	MVT EltTy = MVT::getIntegerVT(Alignment * 8);
	assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
	assert(!isLaneOp && "Unexpected generic load/store lane.");
	unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
	AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
	}
	// Don't set an explicit alignment on regular load/stores that we want
	// to transform to VLD/VST 1_UPD nodes.
	// This matches the behavior of regular load/stores, which only get an
	// explicit alignment if the MMO alignment is larger than the standard
	// alignment of the memory type.
	// Intrinsics, however, always get an explicit alignment, set to the
	// alignment of the MMO.
	Alignment = 1;
	}

	// Create the new updating load/store node.
	// First, create an SDVTList for the new updating node's results.
	EVT Tys[6];
	unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
	unsigned n;
	for (n = 0; n < NumResultVecs; ++n)
	Tys[n] = AlignedVecTy;
	Tys[n++] = MVT::i32;
	Tys[n] = MVT::Other;
	SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));

	// Then, gather the new node's operands.
	SmallVector<SDValue, 8> Ops;
	Ops.push_back(N->getOperand(0)); // incoming chain
	Ops.push_back(N->getOperand(AddrOpIdx));
	Ops.push_back(Inc);

	if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
	// Try to match the intrinsic's signature
	Ops.push_back(StN->getValue());
	} else {
	// Loads (and of course intrinsics) match the intrinsics' signature,
	// so just add all but the alignment operand.
	for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
	Ops.push_back(N->getOperand(i));
	}

	// For all node types, the alignment operand is always the last one.
	Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));

	// If this is a non-standard-aligned STORE, the penultimate operand is the
	// stored value. Bitcast it to the aligned type.
	if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
	SDValue &StVal = Ops[Ops.size()-2];
	StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
	}

	EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
	SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
	MemN->getMemOperand());

	// Update the uses.
	SmallVector<SDValue, 5> NewResults;
	for (unsigned i = 0; i < NumResultVecs; ++i)
	NewResults.push_back(SDValue(UpdN.getNode(), i));

	// If this is an non-standard-aligned LOAD, the first result is the loaded
	// value. Bitcast it to the expected result type.
	if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
	SDValue &LdVal = NewResults[0];
	LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
	}

	NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
	DCI.CombineTo(N, NewResults);
	DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));

	break;
	}
	return SDValue();
	}

	static SDValue PerformVLDCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	if (DCI.isBeforeLegalize() \|\| DCI.isCalledByLegalizer())
	return SDValue();

	return CombineBaseUpdate(N, DCI);
	}

	/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
	/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
	/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
	/// return true.
	static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);
	// vldN-dup instructions only support 64-bit vectors for N > 1.
	if (!VT.is64BitVector())
	return false;

	// Check if the VDUPLANE operand is a vldN-dup intrinsic.
	SDNode *VLD = N->getOperand(0).getNode();
	if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
	return false;
	unsigned NumVecs = 0;
	unsigned NewOpc = 0;
	unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
	if (IntNo == Intrinsic::arm_neon_vld2lane) {
	NumVecs = 2;
	NewOpc = ARMISD::VLD2DUP;
	} else if (IntNo == Intrinsic::arm_neon_vld3lane) {
	NumVecs = 3;
	NewOpc = ARMISD::VLD3DUP;
	} else if (IntNo == Intrinsic::arm_neon_vld4lane) {
	NumVecs = 4;
	NewOpc = ARMISD::VLD4DUP;
	} else {
	return false;
	}

	// First check that all the vldN-lane uses are VDUPLANEs and that the lane
	// numbers match the load.
	unsigned VLDLaneNo =
	cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
	for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
	UI != UE; ++UI) {
	// Ignore uses of the chain result.
	if (UI.getUse().getResNo() == NumVecs)
	continue;
	SDNode User = UI;
	if (User->getOpcode() != ARMISD::VDUPLANE \|\|
	VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
	return false;
	}

	// Create the vldN-dup node.
	EVT Tys[5];
	unsigned n;
	for (n = 0; n < NumVecs; ++n)
	Tys[n] = VT;
	Tys[n] = MVT::Other;
	SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
	SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
	MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
	SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
	Ops, VLDMemInt->getMemoryVT(),
	VLDMemInt->getMemOperand());

	// Update the uses.
	for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
	UI != UE; ++UI) {
	unsigned ResNo = UI.getUse().getResNo();
	// Ignore uses of the chain result.
	if (ResNo == NumVecs)
	continue;
	SDNode User = UI;
	DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
	}

	// Now the vldN-lane intrinsic is dead except for its chain result.
	// Update uses of the chain.
	std::vector<SDValue> VLDDupResults;
	for (unsigned n = 0; n < NumVecs; ++n)
	VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
	VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
	DCI.CombineTo(VLD, VLDDupResults);

	return true;
	}

	/// PerformVDUPLANECombine - Target-specific dag combine xforms for
	/// ARMISD::VDUPLANE.
	static SDValue PerformVDUPLANECombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	SDValue Op = N->getOperand(0);

	// If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
	// of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
	if (CombineVLDDUP(N, DCI))
	return SDValue(N, 0);

	// If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
	// redundant. Ignore bit_converts for now; element sizes are checked below.
	while (Op.getOpcode() == ISD::BITCAST)
	Op = Op.getOperand(0);
	if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
	return SDValue();

	// Make sure the VMOV element size is not bigger than the VDUPLANE elements.
	unsigned EltSize = Op.getScalarValueSizeInBits();
	// The canonical VMOV for a zero vector uses a 32-bit element size.
	unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	unsigned EltBits;
	if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
	EltSize = 8;
	EVT VT = N->getValueType(0);
	if (EltSize > VT.getScalarSizeInBits())
	return SDValue();

	return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
	}

	/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
	static SDValue PerformVDUPCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	SelectionDAG &DAG = DCI.DAG;
	SDValue Op = N->getOperand(0);

	// Match VDUP(LOAD) -> VLD1DUP.
	// We match this pattern here rather than waiting for isel because the
	// transform is only legal for unindexed loads.
	LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
	if (LD && Op.hasOneUse() && LD->isUnindexed() &&
	LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
	SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1),
	DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) };
	SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
	SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys,
	Ops, LD->getMemoryVT(),
	LD->getMemOperand());
	DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
	return VLDDup;
	}

	return SDValue();
	}

	static SDValue PerformLOADCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	EVT VT = N->getValueType(0);

	// If this is a legal vector load, try to combine it into a VLD1_UPD.
	if (ISD::isNormalLoad(N) && VT.isVector() &&
	DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return CombineBaseUpdate(N, DCI);

	return SDValue();
	}

	/// PerformSTORECombine - Target-specific dag combine xforms for
	/// ISD::STORE.
	static SDValue PerformSTORECombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	StoreSDNode *St = cast<StoreSDNode>(N);
	if (St->isVolatile())
	return SDValue();

	// Optimize trunc store (of multiple scalars) to shuffle and store. First,
	// pack all of the elements in one place. Next, store to memory in fewer
	// chunks.
	SDValue StVal = St->getValue();
	EVT VT = StVal.getValueType();
	if (St->isTruncatingStore() && VT.isVector()) {
	SelectionDAG &DAG = DCI.DAG;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT StVT = St->getMemoryVT();
	unsigned NumElems = VT.getVectorNumElements();
	assert(StVT != VT && "Cannot truncate to the same type");
	unsigned FromEltSz = VT.getScalarSizeInBits();
	unsigned ToEltSz = StVT.getScalarSizeInBits();

	// From, To sizes and ElemCount must be pow of two
	if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();

	// We are going to use the original vector elt for storing.
	// Accumulated smaller vector elements must be a multiple of the store size.
	if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();

	unsigned SizeRatio = FromEltSz / ToEltSz;
	assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());

	// Create a type on which we perform the shuffle.
	EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
	NumElems*SizeRatio);
	assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());

	SDLoc DL(St);
	SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
	SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
	for (unsigned i = 0; i < NumElems; ++i)
	ShuffleVec[i] = DAG.getDataLayout().isBigEndian()
	? (i + 1) * SizeRatio - 1
	: i * SizeRatio;

	// Can't shuffle using an illegal type.
	if (!TLI.isTypeLegal(WideVecVT)) return SDValue();

	SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
	DAG.getUNDEF(WideVec.getValueType()),
	ShuffleVec);
	// At this point all of the data is stored at the bottom of the
	// register. We now need to save it to mem.

	// Find the largest store unit
	MVT StoreType = MVT::i8;
	for (MVT Tp : MVT::integer_valuetypes()) {
	if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
	StoreType = Tp;
	}
	// Didn't find a legal store type.
	if (!TLI.isTypeLegal(StoreType))
	return SDValue();

	// Bitcast the original vector into a vector of store-size units
	EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
	StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
	assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
	SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
	SmallVector<SDValue, 8> Chains;
	SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
	TLI.getPointerTy(DAG.getDataLayout()));
	SDValue BasePtr = St->getBasePtr();

	// Perform one or more big stores into memory.
	unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
	for (unsigned I = 0; I < E; I++) {
	SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
	StoreType, ShuffWide,
	DAG.getIntPtrConstant(I, DL));
	SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
	St->getPointerInfo(), St->getAlignment(),
	St->getMemOperand()->getFlags());
	BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
	Increment);
	Chains.push_back(Ch);
	}
	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
	}

	if (!ISD::isNormalStore(St))
	return SDValue();

	// Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
	// ARM stores of arguments in the same cache line.
	if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
	StVal.getNode()->hasOneUse()) {
	SelectionDAG &DAG = DCI.DAG;
	bool isBigEndian = DAG.getDataLayout().isBigEndian();
	SDLoc DL(St);
	SDValue BasePtr = St->getBasePtr();
	SDValue NewST1 = DAG.getStore(
	St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
	BasePtr, St->getPointerInfo(), St->getAlignment(),
	St->getMemOperand()->getFlags());

	SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
	DAG.getConstant(4, DL, MVT::i32));
	return DAG.getStore(NewST1.getValue(0), DL,
	StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
	OffsetPtr, St->getPointerInfo(),
	std::min(4U, St->getAlignment() / 2),
	St->getMemOperand()->getFlags());
	}

	if (StVal.getValueType() == MVT::i64 &&
	StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

	// Bitcast an i64 store extracted from a vector to f64.
	// Otherwise, the i64 value will be legalized to a pair of i32 values.
	SelectionDAG &DAG = DCI.DAG;
	SDLoc dl(StVal);
	SDValue IntVec = StVal.getOperand(0);
	EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
	IntVec.getValueType().getVectorNumElements());
	SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
	SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
	Vec, StVal.getOperand(1));
	dl = SDLoc(N);
	SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
	// Make the DAGCombiner fold the bitcasts.
	DCI.AddToWorklist(Vec.getNode());
	DCI.AddToWorklist(ExtElt.getNode());
	DCI.AddToWorklist(V.getNode());
	return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
	St->getPointerInfo(), St->getAlignment(),
	St->getMemOperand()->getFlags(), St->getAAInfo());
	}

	// If this is a legal vector store, try to combine it into a VST1_UPD.
	if (ISD::isNormalStore(N) && VT.isVector() &&
	DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return CombineBaseUpdate(N, DCI);

	return SDValue();
	}

	/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
	/// can replace combinations of VMUL and VCVT (floating-point to integer)
	/// when the VMUL has a constant operand that is a power of 2.
	///
	/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
	/// vmul.f32 d16, d17, d16
	/// vcvt.s32.f32 d16, d16
	/// becomes:
	/// vcvt.s32.f32 d16, d16, #3
	static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
	const ARMSubtarget *Subtarget) {
	if (!Subtarget->hasNEON())
	return SDValue();

	SDValue Op = N->getOperand(0);
	if (!Op.getValueType().isVector() \|\| !Op.getValueType().isSimple() \|\|
	Op.getOpcode() != ISD::FMUL)
	return SDValue();

	SDValue ConstVec = Op->getOperand(1);
	if (!isa<BuildVectorSDNode>(ConstVec))
	return SDValue();

	MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
	uint32_t FloatBits = FloatTy.getSizeInBits();
	MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
	uint32_t IntBits = IntTy.getSizeInBits();
	unsigned NumLanes = Op.getValueType().getVectorNumElements();
	if (FloatBits != 32 \|\| IntBits > 32 \|\| NumLanes > 4) {
	// These instructions only exist converting from f32 to i32. We can handle
	// smaller integers by generating an extra truncate, but larger ones would
	// be lossy. We also can't handle more then 4 lanes, since these intructions
	// only support v2i32/v4i32 types.
	return SDValue();
	}

	BitVector UndefElements;
	BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
	int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
	if (C == -1 \|\| C == 0 \|\| C > 32)
	return SDValue();

	SDLoc dl(N);
	bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
	unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
	Intrinsic::arm_neon_vcvtfp2fxu;
	SDValue FixConv = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
	DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
	DAG.getConstant(C, dl, MVT::i32));

	if (IntBits < FloatBits)
	FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);

	return FixConv;
	}

	/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
	/// can replace combinations of VCVT (integer to floating-point) and VDIV
	/// when the VDIV has a constant operand that is a power of 2.
	///
	/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
	/// vcvt.f32.s32 d16, d16
	/// vdiv.f32 d16, d17, d16
	/// becomes:
	/// vcvt.f32.s32 d16, d16, #3
	static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
	const ARMSubtarget *Subtarget) {
	if (!Subtarget->hasNEON())
	return SDValue();

	SDValue Op = N->getOperand(0);
	unsigned OpOpcode = Op.getNode()->getOpcode();
	if (!N->getValueType(0).isVector() \|\| !N->getValueType(0).isSimple() \|\|
	(OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
	return SDValue();

	SDValue ConstVec = N->getOperand(1);
	if (!isa<BuildVectorSDNode>(ConstVec))
	return SDValue();

	MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
	uint32_t FloatBits = FloatTy.getSizeInBits();
	MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
	uint32_t IntBits = IntTy.getSizeInBits();
	unsigned NumLanes = Op.getValueType().getVectorNumElements();
	if (FloatBits != 32 \|\| IntBits > 32 \|\| NumLanes > 4) {
	// These instructions only exist converting from i32 to f32. We can handle
	// smaller integers by generating an extra extend, but larger ones would
	// be lossy. We also can't handle more then 4 lanes, since these intructions
	// only support v2i32/v4i32 types.
	return SDValue();
	}

	BitVector UndefElements;
	BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
	int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
	if (C == -1 \|\| C == 0 \|\| C > 32)
	return SDValue();

	SDLoc dl(N);
	bool isSigned = OpOpcode == ISD::SINT_TO_FP;
	SDValue ConvInput = Op.getOperand(0);
	if (IntBits < FloatBits)
	ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
	dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
	ConvInput);

	unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
	Intrinsic::arm_neon_vcvtfxu2fp;
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
	Op.getValueType(),
	DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
	ConvInput, DAG.getConstant(C, dl, MVT::i32));
	}

	/// Getvshiftimm - Check if this is a valid build_vector for the immediate
	/// operand of a vector shift operation, where all the elements of the
	/// build_vector must have the same constant integer value.
	static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
	// Ignore bit_converts.
	while (Op.getOpcode() == ISD::BITCAST)
	Op = Op.getOperand(0);
	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
	APInt SplatBits, SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	if (! BVN \|\| ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
	HasAnyUndefs, ElementBits) \|\|
	SplatBitSize > ElementBits)
	return false;
	Cnt = SplatBits.getSExtValue();
	return true;
	}

	/// isVShiftLImm - Check if this is a valid build_vector for the immediate
	/// operand of a vector shift left operation. That value must be in the range:
	/// 0 <= Value < ElementBits for a left shift; or
	/// 0 <= Value <= ElementBits for a long left shift.
	static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
	assert(VT.isVector() && "vector shift count is not a vector type");
	int64_t ElementBits = VT.getScalarSizeInBits();
	if (! getVShiftImm(Op, ElementBits, Cnt))
	return false;
	return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
	}

	/// isVShiftRImm - Check if this is a valid build_vector for the immediate
	/// operand of a vector shift right operation. For a shift opcode, the value
	/// is positive, but for an intrinsic the value count must be negative. The
	/// absolute value must be in the range:
	/// 1 <= \|Value\| <= ElementBits for a right shift; or
	/// 1 <= \|Value\| <= ElementBits/2 for a narrow right shift.
	static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
	int64_t &Cnt) {
	assert(VT.isVector() && "vector shift count is not a vector type");
	int64_t ElementBits = VT.getScalarSizeInBits();
	if (! getVShiftImm(Op, ElementBits, Cnt))
	return false;
	if (!isIntrinsic)
	return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
	if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
	Cnt = -Cnt;
	return true;
	}
	return false;
	}

	/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
	static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
	unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
	switch (IntNo) {
	default:
	// Don't do anything for most intrinsics.
	break;

	// Vector shifts: check for immediate versions and lower them.
	// Note: This is done during DAG combining instead of DAG legalizing because
	// the build_vectors for 64-bit vector element shift counts are generally
	// not legal, and it is hard to see their values after they get legalized to
	// loads from a constant pool.
	case Intrinsic::arm_neon_vshifts:
	case Intrinsic::arm_neon_vshiftu:
	case Intrinsic::arm_neon_vrshifts:
	case Intrinsic::arm_neon_vrshiftu:
	case Intrinsic::arm_neon_vrshiftn:
	case Intrinsic::arm_neon_vqshifts:
	case Intrinsic::arm_neon_vqshiftu:
	case Intrinsic::arm_neon_vqshiftsu:
	case Intrinsic::arm_neon_vqshiftns:
	case Intrinsic::arm_neon_vqshiftnu:
	case Intrinsic::arm_neon_vqshiftnsu:
	case Intrinsic::arm_neon_vqrshiftns:
	case Intrinsic::arm_neon_vqrshiftnu:
	case Intrinsic::arm_neon_vqrshiftnsu: {
	EVT VT = N->getOperand(1).getValueType();
	int64_t Cnt;
	unsigned VShiftOpc = 0;

	switch (IntNo) {
	case Intrinsic::arm_neon_vshifts:
	case Intrinsic::arm_neon_vshiftu:
	if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
	VShiftOpc = ARMISD::VSHL;
	break;
	}
	if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
	VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
	ARMISD::VSHRs : ARMISD::VSHRu);
	break;
	}
	return SDValue();

	case Intrinsic::arm_neon_vrshifts:
	case Intrinsic::arm_neon_vrshiftu:
	if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
	break;
	return SDValue();

	case Intrinsic::arm_neon_vqshifts:
	case Intrinsic::arm_neon_vqshiftu:
	if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
	break;
	return SDValue();

	case Intrinsic::arm_neon_vqshiftsu:
	if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
	break;
	llvm_unreachable("invalid shift count for vqshlu intrinsic");

	case Intrinsic::arm_neon_vrshiftn:
	case Intrinsic::arm_neon_vqshiftns:
	case Intrinsic::arm_neon_vqshiftnu:
	case Intrinsic::arm_neon_vqshiftnsu:
	case Intrinsic::arm_neon_vqrshiftns:
	case Intrinsic::arm_neon_vqrshiftnu:
	case Intrinsic::arm_neon_vqrshiftnsu:
	// Narrowing shifts require an immediate right shift.
	if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
	break;
	llvm_unreachable("invalid shift count for narrowing vector shift "
	"intrinsic");

	default:
	llvm_unreachable("unhandled vector shift");
	}

	switch (IntNo) {
	case Intrinsic::arm_neon_vshifts:
	case Intrinsic::arm_neon_vshiftu:
	// Opcode already set above.
	break;
	case Intrinsic::arm_neon_vrshifts:
	VShiftOpc = ARMISD::VRSHRs; break;
	case Intrinsic::arm_neon_vrshiftu:
	VShiftOpc = ARMISD::VRSHRu; break;
	case Intrinsic::arm_neon_vrshiftn:
	VShiftOpc = ARMISD::VRSHRN; break;
	case Intrinsic::arm_neon_vqshifts:
	VShiftOpc = ARMISD::VQSHLs; break;
	case Intrinsic::arm_neon_vqshiftu:
	VShiftOpc = ARMISD::VQSHLu; break;
	case Intrinsic::arm_neon_vqshiftsu:
	VShiftOpc = ARMISD::VQSHLsu; break;
	case Intrinsic::arm_neon_vqshiftns:
	VShiftOpc = ARMISD::VQSHRNs; break;
	case Intrinsic::arm_neon_vqshiftnu:
	VShiftOpc = ARMISD::VQSHRNu; break;
	case Intrinsic::arm_neon_vqshiftnsu:
	VShiftOpc = ARMISD::VQSHRNsu; break;
	case Intrinsic::arm_neon_vqrshiftns:
	VShiftOpc = ARMISD::VQRSHRNs; break;
	case Intrinsic::arm_neon_vqrshiftnu:
	VShiftOpc = ARMISD::VQRSHRNu; break;
	case Intrinsic::arm_neon_vqrshiftnsu:
	VShiftOpc = ARMISD::VQRSHRNsu; break;
	}

	SDLoc dl(N);
	return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
	N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
	}

	case Intrinsic::arm_neon_vshiftins: {
	EVT VT = N->getOperand(1).getValueType();
	int64_t Cnt;
	unsigned VShiftOpc = 0;

	if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
	VShiftOpc = ARMISD::VSLI;
	else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
	VShiftOpc = ARMISD::VSRI;
	else {
	llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
	}

	SDLoc dl(N);
	return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
	N->getOperand(1), N->getOperand(2),
	DAG.getConstant(Cnt, dl, MVT::i32));
	}

	case Intrinsic::arm_neon_vqrshifts:
	case Intrinsic::arm_neon_vqrshiftu:
	// No immediate versions of these to check for.
	break;
	}

	return SDValue();
	}

	/// PerformShiftCombine - Checks for immediate versions of vector shifts and
	/// lowers them. As with the vector shift intrinsics, this is done during DAG
	/// combining instead of DAG legalizing because the build_vectors for 64-bit
	/// vector element shift counts are generally not legal, and it is hard to see
	/// their values after they get legalized to loads from a constant pool.
	static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
	const ARMSubtarget *ST) {
	EVT VT = N->getValueType(0);
	if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
	// Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
	// 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
	SDValue N1 = N->getOperand(1);
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
	SDValue N0 = N->getOperand(0);
	if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
	DAG.MaskedValueIsZero(N0.getOperand(0),
	APInt::getHighBitsSet(32, 16)))
	return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
	}
	}

	// Nothing to be done for scalar shifts.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!VT.isVector() \|\| !TLI.isTypeLegal(VT))
	return SDValue();

	assert(ST->hasNEON() && "unexpected vector shift");
	int64_t Cnt;

	switch (N->getOpcode()) {
	default: llvm_unreachable("unexpected shift opcode");

	case ISD::SHL:
	if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
	SDLoc dl(N);
	return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0),
	DAG.getConstant(Cnt, dl, MVT::i32));
	}
	break;

	case ISD::SRA:
	case ISD::SRL:
	if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
	unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
	ARMISD::VSHRs : ARMISD::VSHRu);
	SDLoc dl(N);
	return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
	DAG.getConstant(Cnt, dl, MVT::i32));
	}
	}
	return SDValue();
	}

	/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
	/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
	static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
	const ARMSubtarget *ST) {
	SDValue N0 = N->getOperand(0);

	// Check for sign- and zero-extensions of vector extract operations of 8-
	// and 16-bit vector elements. NEON supports these directly. They are
	// handled during DAG combining because type legalization will promote them
	// to 32-bit types and it is messy to recognize the operations after that.
	if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
	SDValue Vec = N0.getOperand(0);
	SDValue Lane = N0.getOperand(1);
	EVT VT = N->getValueType(0);
	EVT EltVT = N0.getValueType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (VT == MVT::i32 &&
	(EltVT == MVT::i8 \|\| EltVT == MVT::i16) &&
	TLI.isTypeLegal(Vec.getValueType()) &&
	isa<ConstantSDNode>(Lane)) {

	unsigned Opc = 0;
	switch (N->getOpcode()) {
	default: llvm_unreachable("unexpected opcode");
	case ISD::SIGN_EXTEND:
	Opc = ARMISD::VGETLANEs;
	break;
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND:
	Opc = ARMISD::VGETLANEu;
	break;
	}
	return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
	}
	}

	return SDValue();
	}

	SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
	// If we have a CMOV, OR and AND combination such as:
	// if (x & CN)
	// y \|= CM;
	//
	// And:
	// * CN is a single bit;
	// * All bits covered by CM are known zero in y
	//
	// Then we can convert this into a sequence of BFI instructions. This will
	// always be a win if CM is a single bit, will always be no worse than the
	// TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
	// three bits (due to the extra IT instruction).

	SDValue Op0 = CMOV->getOperand(0);
	SDValue Op1 = CMOV->getOperand(1);
	auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
	auto CC = CCNode->getAPIntValue().getLimitedValue();
	SDValue CmpZ = CMOV->getOperand(4);

	// The compare must be against zero.
	if (!isNullConstant(CmpZ->getOperand(1)))
	return SDValue();

	assert(CmpZ->getOpcode() == ARMISD::CMPZ);
	SDValue And = CmpZ->getOperand(0);
	if (And->getOpcode() != ISD::AND)
	return SDValue();
	ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
	if (!AndC \|\| !AndC->getAPIntValue().isPowerOf2())
	return SDValue();
	SDValue X = And->getOperand(0);

	if (CC == ARMCC::EQ) {
	// We're performing an "equal to zero" compare. Swap the operands so we
	// canonicalize on a "not equal to zero" compare.
	std::swap(Op0, Op1);
	} else {
	assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
	}

	if (Op1->getOpcode() != ISD::OR)
	return SDValue();

	ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
	if (!OrC)
	return SDValue();
	SDValue Y = Op1->getOperand(0);

	if (Op0 != Y)
	return SDValue();

	// Now, is it profitable to continue?
	APInt OrCI = OrC->getAPIntValue();
	unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
	if (OrCI.countPopulation() > Heuristic)
	return SDValue();

	// Lastly, can we determine that the bits defined by OrCI
	// are zero in Y?
	KnownBits Known;
	DAG.computeKnownBits(Y, Known);
	if ((OrCI & Known.Zero) != OrCI)
	return SDValue();

	// OK, we can do the combine.
	SDValue V = Y;
	SDLoc dl(X);
	EVT VT = X.getValueType();
	unsigned BitInX = AndC->getAPIntValue().logBase2();

	if (BitInX != 0) {
	// We must shift X first.
	X = DAG.getNode(ISD::SRL, dl, VT, X,
	DAG.getConstant(BitInX, dl, VT));
	}

	for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
	BitInY < NumActiveBits; ++BitInY) {
	if (OrCI[BitInY] == 0)
	continue;
	APInt Mask(VT.getSizeInBits(), 0);
	Mask.setBit(BitInY);
	V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
	// Confusingly, the operand is an inverted mask.
	DAG.getConstant(~Mask, dl, VT));
	}

	return V;
	}

	/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
	SDValue
	ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
	SDValue Cmp = N->getOperand(4);
	if (Cmp.getOpcode() != ARMISD::CMPZ)
	// Only looking at NE cases.
	return SDValue();

	EVT VT = N->getValueType(0);
	SDLoc dl(N);
	SDValue LHS = Cmp.getOperand(0);
	SDValue RHS = Cmp.getOperand(1);
	SDValue Chain = N->getOperand(0);
	SDValue BB = N->getOperand(1);
	SDValue ARMcc = N->getOperand(2);
	ARMCC::CondCodes CC =
	(ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();

	// (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
	// -> (brcond Chain BB CC CPSR Cmp)
	if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
	LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
	LHS->getOperand(0)->hasOneUse()) {
	auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
	auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
	auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
	auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
	if ((LHS00C && LHS00C->getZExtValue() == 0) &&
	(LHS01C && LHS01C->getZExtValue() == 1) &&
	(LHS1C && LHS1C->getZExtValue() == 1) &&
	(RHSC && RHSC->getZExtValue() == 0)) {
	return DAG.getNode(
	ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
	LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
	}
	}

	return SDValue();
	}

	/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
	SDValue
	ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
	SDValue Cmp = N->getOperand(4);
	if (Cmp.getOpcode() != ARMISD::CMPZ)
	// Only looking at EQ and NE cases.
	return SDValue();

	EVT VT = N->getValueType(0);
	SDLoc dl(N);
	SDValue LHS = Cmp.getOperand(0);
	SDValue RHS = Cmp.getOperand(1);
	SDValue FalseVal = N->getOperand(0);
	SDValue TrueVal = N->getOperand(1);
	SDValue ARMcc = N->getOperand(2);
	ARMCC::CondCodes CC =
	(ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();

	// BFI is only available on V6T2+.
	if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
	SDValue R = PerformCMOVToBFICombine(N, DAG);
	if (R)
	return R;
	}

	// Simplify
	// mov r1, r0
	// cmp r1, x
	// mov r0, y
	// moveq r0, x
	// to
	// cmp r0, x
	// movne r0, y
	//
	// mov r1, r0
	// cmp r1, x
	// mov r0, x
	// movne r0, y
	// to
	// cmp r0, x
	// movne r0, y
	/// FIXME: Turn this into a target neutral optimization?
	SDValue Res;
	if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
	Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
	N->getOperand(3), Cmp);
	} else if (CC == ARMCC::EQ && TrueVal == RHS) {
	SDValue ARMcc;
	SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
	Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
	N->getOperand(3), NewCmp);
	}

	// (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
	// -> (cmov F T CC CPSR Cmp)
	if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
	auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
	auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
	auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
	if ((LHS0C && LHS0C->getZExtValue() == 0) &&
	(LHS1C && LHS1C->getZExtValue() == 1) &&
	(RHSC && RHSC->getZExtValue() == 0)) {
	return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
	LHS->getOperand(2), LHS->getOperand(3),
	LHS->getOperand(4));
	}
	}

	if (Res.getNode()) {
	KnownBits Known;
	DAG.computeKnownBits(SDValue(N,0), Known);
	// Capture demanded bits information that would be otherwise lost.
	if (Known.Zero == 0xfffffffe)
	Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
	DAG.getValueType(MVT::i1));
	else if (Known.Zero == 0xffffff00)
	Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
	DAG.getValueType(MVT::i8));
	else if (Known.Zero == 0xffff0000)
	Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
	DAG.getValueType(MVT::i16));
	}

	return Res;
	}

	SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	switch (N->getOpcode()) {
	default: break;
	case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
	case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
	case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
	case ISD::SUB: return PerformSUBCombine(N, DCI);
	case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
	case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
	case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
	case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
	case ARMISD::ADDC:
	case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget);
	case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
	case ARMISD::BFI: return PerformBFICombine(N, DCI);
	case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
	case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
	case ISD::STORE: return PerformSTORECombine(N, DCI);
	case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
	case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
	case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
	case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
	case ARMISD::VDUP: return PerformVDUPCombine(N, DCI);
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	return PerformVCVTCombine(N, DCI.DAG, Subtarget);
	case ISD::FDIV:
	return PerformVDIVCombine(N, DCI.DAG, Subtarget);
	case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
	case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
	case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
	case ISD::LOAD: return PerformLOADCombine(N, DCI);
	case ARMISD::VLD1DUP:
	case ARMISD::VLD2DUP:
	case ARMISD::VLD3DUP:
	case ARMISD::VLD4DUP:
	return PerformVLDCombine(N, DCI);
	case ARMISD::BUILD_VECTOR:
	return PerformARMBUILD_VECTORCombine(N, DCI);
	case ARMISD::SMULWB: {
	unsigned BitWidth = N->getValueType(0).getSizeInBits();
	APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
	if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
	return SDValue();
	break;
	}
	case ARMISD::SMULWT: {
	unsigned BitWidth = N->getValueType(0).getSizeInBits();
	APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
	if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
	return SDValue();
	break;
	}
	case ARMISD::SMLALBB: {
	unsigned BitWidth = N->getValueType(0).getSizeInBits();
	APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
	if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) \|\|
	(SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
	return SDValue();
	break;
	}
	case ARMISD::SMLALBT: {
	unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
	APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
	unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
	APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
	if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) \|\|
	(SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
	return SDValue();
	break;
	}
	case ARMISD::SMLALTB: {
	unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
	APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
	unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
	APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
	if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) \|\|
	(SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
	return SDValue();
	break;
	}
	case ARMISD::SMLALTT: {
	unsigned BitWidth = N->getValueType(0).getSizeInBits();
	APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
	if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) \|\|
	(SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
	return SDValue();
	break;
	}
	case ISD::INTRINSIC_VOID:
	case ISD::INTRINSIC_W_CHAIN:
	switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
	case Intrinsic::arm_neon_vld1:
	case Intrinsic::arm_neon_vld2:
	case Intrinsic::arm_neon_vld3:
	case Intrinsic::arm_neon_vld4:
	case Intrinsic::arm_neon_vld2lane:
	case Intrinsic::arm_neon_vld3lane:
	case Intrinsic::arm_neon_vld4lane:
	case Intrinsic::arm_neon_vst1:
	case Intrinsic::arm_neon_vst2:
	case Intrinsic::arm_neon_vst3:
	case Intrinsic::arm_neon_vst4:
	case Intrinsic::arm_neon_vst2lane:
	case Intrinsic::arm_neon_vst3lane:
	case Intrinsic::arm_neon_vst4lane:
	return PerformVLDCombine(N, DCI);
	default: break;
	}
	break;
	}
	return SDValue();
	}

	bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
	EVT VT) const {
	return (VT == MVT::f32) && (Opc == ISD::LOAD \|\| Opc == ISD::STORE);
	}

	bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
	unsigned,
	unsigned,
	bool *Fast) const {
	// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
	bool AllowsUnaligned = Subtarget->allowsUnalignedMem();

	switch (VT.getSimpleVT().SimpleTy) {
	default:
	return false;
	case MVT::i8:
	case MVT::i16:
	case MVT::i32: {
	// Unaligned access can use (for example) LRDB, LRDH, LDR
	if (AllowsUnaligned) {
	if (Fast)
	*Fast = Subtarget->hasV7Ops();
	return true;
	}
	return false;
	}
	case MVT::f64:
	case MVT::v2f64: {
	// For any little-endian targets with neon, we can support unaligned ld/st
	// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
	// A big-endian target may also explicitly support unaligned accesses
	if (Subtarget->hasNEON() && (AllowsUnaligned \|\| Subtarget->isLittle())) {
	if (Fast)
	*Fast = true;
	return true;
	}
	return false;
	}
	}
	}

	static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
	unsigned AlignCheck) {
	return ((SrcAlign == 0 \|\| SrcAlign % AlignCheck == 0) &&
	(DstAlign == 0 \|\| DstAlign % AlignCheck == 0));
	}

	EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
	unsigned DstAlign, unsigned SrcAlign,
	bool IsMemset, bool ZeroMemset,
	bool MemcpyStrSrc,
	MachineFunction &MF) const {
	const Function *F = MF.getFunction();

	// See if we can use NEON instructions for this...
	if ((!IsMemset \|\| ZeroMemset) && Subtarget->hasNEON() &&
	!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
	bool Fast;
	if (Size >= 16 &&
	(memOpAlign(SrcAlign, DstAlign, 16) \|\|
	(allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
	return MVT::v2f64;
	} else if (Size >= 8 &&
	(memOpAlign(SrcAlign, DstAlign, 8) \|\|
	(allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
	Fast))) {
	return MVT::f64;
	}
	}

	// Let the target-independent logic figure it out.
	return MVT::Other;
	}

	bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
	if (Val.getOpcode() != ISD::LOAD)
	return false;

	EVT VT1 = Val.getValueType();
	if (!VT1.isSimple() \|\| !VT1.isInteger() \|\|
	!VT2.isSimple() \|\| !VT2.isInteger())
	return false;

	switch (VT1.getSimpleVT().SimpleTy) {
	default: break;
	case MVT::i1:
	case MVT::i8:
	case MVT::i16:
	// 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
	return true;
	}

	return false;
	}

	bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
	EVT VT = ExtVal.getValueType();

	if (!isTypeLegal(VT))
	return false;

	// Don't create a loadext if we can fold the extension into a wide/long
	// instruction.
	// If there's more than one user instruction, the loadext is desirable no
	// matter what. There can be two uses by the same instruction.
	if (ExtVal->use_empty() \|\|
	!ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
	return true;

	SDNode U = ExtVal->use_begin();
	if ((U->getOpcode() == ISD::ADD \|\| U->getOpcode() == ISD::SUB \|\|
	U->getOpcode() == ISD::SHL \|\| U->getOpcode() == ARMISD::VSHL))
	return false;

	return true;
	}

	bool ARMTargetLowering::allowTruncateForTailCall(Type Ty1, Type Ty2) const {
	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
	return false;

	if (!isTypeLegal(EVT::getEVT(Ty1)))
	return false;

	assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");

	// Assuming the caller doesn't have a zeroext or signext return parameter,
	// truncation all the way down to i1 is valid.
	return true;
	}

	int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
	const AddrMode &AM, Type *Ty,
	unsigned AS) const {
	if (isLegalAddressingMode(DL, AM, Ty, AS)) {
	if (Subtarget->hasFPAO())
	return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
	return 0;
	}
	return -1;
	}


	static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
	if (V < 0)
	return false;

	unsigned Scale = 1;
	switch (VT.getSimpleVT().SimpleTy) {
	default: return false;
	case MVT::i1:
	case MVT::i8:
	// Scale == 1;
	break;
	case MVT::i16:
	// Scale == 2;
	Scale = 2;
	break;
	case MVT::i32:
	// Scale == 4;
	Scale = 4;
	break;
	}

	if ((V & (Scale - 1)) != 0)
	return false;
	V /= Scale;
	return V == (V & ((1LL << 5) - 1));
	}

	static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
	const ARMSubtarget *Subtarget) {
	bool isNeg = false;
	if (V < 0) {
	isNeg = true;
	V = - V;
	}

	switch (VT.getSimpleVT().SimpleTy) {
	default: return false;
	case MVT::i1:
	case MVT::i8:
	case MVT::i16:
	case MVT::i32:
	// + imm12 or - imm8
	if (isNeg)
	return V == (V & ((1LL << 8) - 1));
	return V == (V & ((1LL << 12) - 1));
	case MVT::f32:
	case MVT::f64:
	// Same as ARM mode. FIXME: NEON?
	if (!Subtarget->hasVFP2())
	return false;
	if ((V & 3) != 0)
	return false;
	V >>= 2;
	return V == (V & ((1LL << 8) - 1));
	}
	}

	/// isLegalAddressImmediate - Return true if the integer value can be used
	/// as the offset of the target addressing mode for load / store of the
	/// given type.
	static bool isLegalAddressImmediate(int64_t V, EVT VT,
	const ARMSubtarget *Subtarget) {
	if (V == 0)
	return true;

	if (!VT.isSimple())
	return false;

	if (Subtarget->isThumb1Only())
	return isLegalT1AddressImmediate(V, VT);
	else if (Subtarget->isThumb2())
	return isLegalT2AddressImmediate(V, VT, Subtarget);

	// ARM mode.
	if (V < 0)
	V = - V;
	switch (VT.getSimpleVT().SimpleTy) {
	default: return false;
	case MVT::i1:
	case MVT::i8:
	case MVT::i32:
	// +- imm12
	return V == (V & ((1LL << 12) - 1));
	case MVT::i16:
	// +- imm8
	return V == (V & ((1LL << 8) - 1));
	case MVT::f32:
	case MVT::f64:
	if (!Subtarget->hasVFP2()) // FIXME: NEON?
	return false;
	if ((V & 3) != 0)
	return false;
	V >>= 2;
	return V == (V & ((1LL << 8) - 1));
	}
	}

	bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
	EVT VT) const {
	int Scale = AM.Scale;
	if (Scale < 0)
	return false;

	switch (VT.getSimpleVT().SimpleTy) {
	default: return false;
	case MVT::i1:
	case MVT::i8:
	case MVT::i16:
	case MVT::i32:
	if (Scale == 1)
	return true;
	// r + r << imm
	Scale = Scale & ~1;
	return Scale == 2 \|\| Scale == 4 \|\| Scale == 8;
	case MVT::i64:
	// r + r
	if (((unsigned)AM.HasBaseReg + Scale) <= 2)
	return true;
	return false;
	case MVT::isVoid:
	// Note, we allow "void" uses (basically, uses that aren't loads or
	// stores), because arm allows folding a scale into many arithmetic
	// operations. This should be made more precise and revisited later.

	// Allow r << imm, but the imm has to be a multiple of two.
	if (Scale & 1) return false;
	return isPowerOf2_32(Scale);
	}
	}

	/// isLegalAddressingMode - Return true if the addressing mode represented
	/// by AM is legal for this target, for a load/store of the specified type.
	bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
	const AddrMode &AM, Type *Ty,
	unsigned AS) const {
	EVT VT = getValueType(DL, Ty, true);
	if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
	return false;

	// Can never fold addr of global into load/store.
	if (AM.BaseGV)
	return false;

	switch (AM.Scale) {
	case 0: // no scale reg, must be "r+i" or "r", or "i".
	break;
	case 1:
	if (Subtarget->isThumb1Only())
	return false;
	LLVM_FALLTHROUGH;
	default:
	// ARM doesn't support any R+R*scale+imm addr modes.
	if (AM.BaseOffs)
	return false;

	if (!VT.isSimple())
	return false;

	if (Subtarget->isThumb2())
	return isLegalT2ScaledAddressingMode(AM, VT);

	int Scale = AM.Scale;
	switch (VT.getSimpleVT().SimpleTy) {
	default: return false;
	case MVT::i1:
	case MVT::i8:
	case MVT::i32:
	if (Scale < 0) Scale = -Scale;
	if (Scale == 1)
	return true;
	// r + r << imm
	return isPowerOf2_32(Scale & ~1);
	case MVT::i16:
	case MVT::i64:
	// r + r
	if (((unsigned)AM.HasBaseReg + Scale) <= 2)
	return true;
	return false;

	case MVT::isVoid:
	// Note, we allow "void" uses (basically, uses that aren't loads or
	// stores), because arm allows folding a scale into many arithmetic
	// operations. This should be made more precise and revisited later.

	// Allow r << imm, but the imm has to be a multiple of two.
	if (Scale & 1) return false;
	return isPowerOf2_32(Scale);
	}
	}
	return true;
	}

	/// isLegalICmpImmediate - Return true if the specified immediate is legal
	/// icmp immediate, that is the target has icmp instructions which can compare
	/// a register against the immediate without having to materialize the
	/// immediate into a register.
	bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
	// Thumb2 and ARM modes can use cmn for negative immediates.
	if (!Subtarget->isThumb())
	return ARM_AM::getSOImmVal(std::abs(Imm)) != -1;
	if (Subtarget->isThumb2())
	return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1;
	// Thumb1 doesn't have cmn, and only 8-bit immediates.
	return Imm >= 0 && Imm <= 255;
	}

	/// isLegalAddImmediate - Return true if the specified immediate is a legal add
	/// or sub immediate, that is the target has add or sub instructions which can
	/// add a register with the immediate without having to materialize the
	/// immediate into a register.
	bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
	// Same encoding for add/sub, just flip the sign.
	int64_t AbsImm = std::abs(Imm);
	if (!Subtarget->isThumb())
	return ARM_AM::getSOImmVal(AbsImm) != -1;
	if (Subtarget->isThumb2())
	return ARM_AM::getT2SOImmVal(AbsImm) != -1;
	// Thumb1 only has 8-bit unsigned immediate.
	return AbsImm >= 0 && AbsImm <= 255;
	}

	static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
	bool isSEXTLoad, SDValue &Base,
	SDValue &Offset, bool &isInc,
	SelectionDAG &DAG) {
	if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
	return false;

	if (VT == MVT::i16 \|\| ((VT == MVT::i8 \|\| VT == MVT::i1) && isSEXTLoad)) {
	// AddressingMode 3
	Base = Ptr->getOperand(0);
	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
	int RHSC = (int)RHS->getZExtValue();
	if (RHSC < 0 && RHSC > -256) {
	assert(Ptr->getOpcode() == ISD::ADD);
	isInc = false;
	Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
	return true;
	}
	}
	isInc = (Ptr->getOpcode() == ISD::ADD);
	Offset = Ptr->getOperand(1);
	return true;
	} else if (VT == MVT::i32 \|\| VT == MVT::i8 \|\| VT == MVT::i1) {
	// AddressingMode 2
	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
	int RHSC = (int)RHS->getZExtValue();
	if (RHSC < 0 && RHSC > -0x1000) {
	assert(Ptr->getOpcode() == ISD::ADD);
	isInc = false;
	Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
	Base = Ptr->getOperand(0);
	return true;
	}
	}

	if (Ptr->getOpcode() == ISD::ADD) {
	isInc = true;
	ARM_AM::ShiftOpc ShOpcVal=
	ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
	if (ShOpcVal != ARM_AM::no_shift) {
	Base = Ptr->getOperand(1);
	Offset = Ptr->getOperand(0);
	} else {
	Base = Ptr->getOperand(0);
	Offset = Ptr->getOperand(1);
	}
	return true;
	}

	isInc = (Ptr->getOpcode() == ISD::ADD);
	Base = Ptr->getOperand(0);
	Offset = Ptr->getOperand(1);
	return true;
	}

	// FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
	return false;
	}

	static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
	bool isSEXTLoad, SDValue &Base,
	SDValue &Offset, bool &isInc,
	SelectionDAG &DAG) {
	if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
	return false;

	Base = Ptr->getOperand(0);
	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
	int RHSC = (int)RHS->getZExtValue();
	if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
	assert(Ptr->getOpcode() == ISD::ADD);
	isInc = false;
	Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
	return true;
	} else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
	isInc = Ptr->getOpcode() == ISD::ADD;
	Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
	return true;
	}
	}

	return false;
	}

	/// getPreIndexedAddressParts - returns true by value, base pointer and
	/// offset pointer and addressing mode by reference if the node's address
	/// can be legally represented as pre-indexed load / store address.
	bool
	ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
	SDValue &Offset,
	ISD::MemIndexedMode &AM,
	SelectionDAG &DAG) const {
	if (Subtarget->isThumb1Only())
	return false;

	EVT VT;
	SDValue Ptr;
	bool isSEXTLoad = false;
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
	Ptr = LD->getBasePtr();
	VT = LD->getMemoryVT();
	isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
	Ptr = ST->getBasePtr();
	VT = ST->getMemoryVT();
	} else
	return false;

	bool isInc;
	bool isLegal = false;
	if (Subtarget->isThumb2())
	isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
	Offset, isInc, DAG);
	else
	isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
	Offset, isInc, DAG);
	if (!isLegal)
	return false;

	AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
	return true;
	}

	/// getPostIndexedAddressParts - returns true by value, base pointer and
	/// offset pointer and addressing mode by reference if this node can be
	/// combined with a load / store to form a post-indexed load / store.
	bool ARMTargetLowering::getPostIndexedAddressParts(SDNode N, SDNode Op,
	SDValue &Base,
	SDValue &Offset,
	ISD::MemIndexedMode &AM,
	SelectionDAG &DAG) const {
	EVT VT;
	SDValue Ptr;
	bool isSEXTLoad = false, isNonExt;
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
	VT = LD->getMemoryVT();
	Ptr = LD->getBasePtr();
	isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
	isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
	VT = ST->getMemoryVT();
	Ptr = ST->getBasePtr();
	isNonExt = !ST->isTruncatingStore();
	} else
	return false;

	if (Subtarget->isThumb1Only()) {
	// Thumb-1 can do a limited post-inc load or store as an updating LDM. It
	// must be non-extending/truncating, i32, with an offset of 4.
	assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
	if (Op->getOpcode() != ISD::ADD \|\| !isNonExt)
	return false;
	auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
	if (!RHS \|\| RHS->getZExtValue() != 4)
	return false;

	Offset = Op->getOperand(1);
	Base = Op->getOperand(0);
	AM = ISD::POST_INC;
	return true;
	}

	bool isInc;
	bool isLegal = false;
	if (Subtarget->isThumb2())
	isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
	isInc, DAG);
	else
	isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
	isInc, DAG);
	if (!isLegal)
	return false;

	if (Ptr != Base) {
	// Swap base ptr and offset to catch more post-index load / store when
	// it's legal. In Thumb2 mode, offset must be an immediate.
	if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
	!Subtarget->isThumb2())
	std::swap(Base, Offset);

	// Post-indexed load / store update the base pointer.
	if (Ptr != Base)
	return false;
	}

	AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
	return true;
	}

	void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
	KnownBits &Known,
	const APInt &DemandedElts,
	const SelectionDAG &DAG,
	unsigned Depth) const {
	unsigned BitWidth = Known.getBitWidth();
	Known.resetAll();
	switch (Op.getOpcode()) {
	default: break;
	case ARMISD::ADDC:
	case ARMISD::ADDE:
	case ARMISD::SUBC:
	case ARMISD::SUBE:
	// These nodes' second result is a boolean
	if (Op.getResNo() == 0)
	break;
	Known.Zero \|= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
	break;
	case ARMISD::CMOV: {
	// Bits are known zero/one if known on the LHS and RHS.
	DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1);
	if (Known.isUnknown())
	return;

	KnownBits KnownRHS;
	DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1);
	Known.Zero &= KnownRHS.Zero;
	Known.One &= KnownRHS.One;
	return;
	}
	case ISD::INTRINSIC_W_CHAIN: {
	ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
	Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
	switch (IntID) {
	default: return;
	case Intrinsic::arm_ldaex:
	case Intrinsic::arm_ldrex: {
	EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
	unsigned MemBits = VT.getScalarSizeInBits();
	Known.Zero \|= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
	return;
	}
	}
	}
	case ARMISD::BFI: {
	// Conservatively, we can recurse down the first operand
	// and just mask out all affected bits.
	DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);

	// The operand to BFI is already a mask suitable for removing the bits it
	// sets.
	ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
	const APInt &Mask = CI->getAPIntValue();
	Known.Zero &= Mask;
	Known.One &= Mask;
	return;
	}
	}
	}

	//===----------------------------------------------------------------------===//
	// ARM Inline Assembly Support
	//===----------------------------------------------------------------------===//

	bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
	// Looking for "rev" which is V6+.
	if (!Subtarget->hasV6Ops())
	return false;

	InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
	std::string AsmStr = IA->getAsmString();
	SmallVector<StringRef, 4> AsmPieces;
	SplitString(AsmStr, AsmPieces, ";\n");

	switch (AsmPieces.size()) {
	default: return false;
	case 1:
	AsmStr = AsmPieces[0];
	AsmPieces.clear();
	SplitString(AsmStr, AsmPieces, " \t,");

	// rev $0, $1
	if (AsmPieces.size() == 3 &&
	AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
	IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
	IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
	if (Ty && Ty->getBitWidth() == 32)
	return IntrinsicLowering::LowerToByteSwap(CI);
	}
	break;
	}

	return false;
	}

	const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
	// At this point, we have to lower this constraint to something else, so we
	// lower it to an "r" or "w". However, by doing this we will force the result
	// to be in register, while the X constraint is much more permissive.
	//
	// Although we are correct (we are free to emit anything, without
	// constraints), we might break use cases that would expect us to be more
	// efficient and emit something else.
	if (!Subtarget->hasVFP2())
	return "r";
	if (ConstraintVT.isFloatingPoint())
	return "w";
	if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
	(ConstraintVT.getSizeInBits() == 64 \|\|
	ConstraintVT.getSizeInBits() == 128))
	return "w";

	return "r";
	}

	/// getConstraintType - Given a constraint letter, return the type of
	/// constraint it is for this target.
	ARMTargetLowering::ConstraintType
	ARMTargetLowering::getConstraintType(StringRef Constraint) const {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	default: break;
	case 'l': return C_RegisterClass;
	case 'w': return C_RegisterClass;
	case 'h': return C_RegisterClass;
	case 'x': return C_RegisterClass;
	case 't': return C_RegisterClass;
	case 'j': return C_Other; // Constant for movw.
	// An address with a single base register. Due to the way we
	// currently handle addresses it is the same as an 'r' memory constraint.
	case 'Q': return C_Memory;
	}
	} else if (Constraint.size() == 2) {
	switch (Constraint[0]) {
	default: break;
	// All 'U+' constraints are addresses.
	case 'U': return C_Memory;
	}
	}
	return TargetLowering::getConstraintType(Constraint);
	}

	/// Examine constraint type and operand type and determine a weight value.
	/// This object must already have been set up with the operand type
	/// and the current alternative constraint selected.
	TargetLowering::ConstraintWeight
	ARMTargetLowering::getSingleConstraintMatchWeight(
	AsmOperandInfo &info, const char *constraint) const {
	ConstraintWeight weight = CW_Invalid;
	Value *CallOperandVal = info.CallOperandVal;
	// If we don't have a value, we can't do a match,
	// but allow it at the lowest weight.
	if (!CallOperandVal)
	return CW_Default;
	Type *type = CallOperandVal->getType();
	// Look at the constraint type.
	switch (*constraint) {
	default:
	weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
	break;
	case 'l':
	if (type->isIntegerTy()) {
	if (Subtarget->isThumb())
	weight = CW_SpecificReg;
	else
	weight = CW_Register;
	}
	break;
	case 'w':
	if (type->isFloatingPointTy())
	weight = CW_Register;
	break;
	}
	return weight;
	}

	typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
	RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
	const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
	if (Constraint.size() == 1) {
	// GCC ARM Constraint Letters
	switch (Constraint[0]) {
	case 'l': // Low regs or general regs.
	if (Subtarget->isThumb())
	return RCPair(0U, &ARM::tGPRRegClass);
	return RCPair(0U, &ARM::GPRRegClass);
	case 'h': // High regs or no regs.
	if (Subtarget->isThumb())
	return RCPair(0U, &ARM::hGPRRegClass);
	break;
	case 'r':
	if (Subtarget->isThumb1Only())
	return RCPair(0U, &ARM::tGPRRegClass);
	return RCPair(0U, &ARM::GPRRegClass);
	case 'w':
	if (VT == MVT::Other)
	break;
	if (VT == MVT::f32)
	return RCPair(0U, &ARM::SPRRegClass);
	if (VT.getSizeInBits() == 64)
	return RCPair(0U, &ARM::DPRRegClass);
	if (VT.getSizeInBits() == 128)
	return RCPair(0U, &ARM::QPRRegClass);
	break;
	case 'x':
	if (VT == MVT::Other)
	break;
	if (VT == MVT::f32)
	return RCPair(0U, &ARM::SPR_8RegClass);
	if (VT.getSizeInBits() == 64)
	return RCPair(0U, &ARM::DPR_8RegClass);
	if (VT.getSizeInBits() == 128)
	return RCPair(0U, &ARM::QPR_8RegClass);
	break;
	case 't':
	if (VT == MVT::f32)
	return RCPair(0U, &ARM::SPRRegClass);
	break;
	}
	}
	if (StringRef("{cc}").equals_lower(Constraint))
	return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);

	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
	}

	/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
	/// vector. If it is invalid, don't add anything to Ops.
	void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
	std::string &Constraint,
	std::vector<SDValue>&Ops,
	SelectionDAG &DAG) const {
	SDValue Result;

	// Currently only support length 1 constraints.
	if (Constraint.length() != 1) return;

	char ConstraintLetter = Constraint[0];
	switch (ConstraintLetter) {
	default: break;
	case 'j':
	case 'I': case 'J': case 'K': case 'L':
	case 'M': case 'N': case 'O':
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
	if (!C)
	return;

	int64_t CVal64 = C->getSExtValue();
	int CVal = (int) CVal64;
	// None of these constraints allow values larger than 32 bits. Check
	// that the value fits in an int.
	if (CVal != CVal64)
	return;

	switch (ConstraintLetter) {
	case 'j':
	// Constant suitable for movw, must be between 0 and
	// 65535.
	if (Subtarget->hasV6T2Ops())
	if (CVal >= 0 && CVal <= 65535)
	break;
	return;
	case 'I':
	if (Subtarget->isThumb1Only()) {
	// This must be a constant between 0 and 255, for ADD
	// immediates.
	if (CVal >= 0 && CVal <= 255)
	break;
	} else if (Subtarget->isThumb2()) {
	// A constant that can be used as an immediate value in a
	// data-processing instruction.
	if (ARM_AM::getT2SOImmVal(CVal) != -1)
	break;
	} else {
	// A constant that can be used as an immediate value in a
	// data-processing instruction.
	if (ARM_AM::getSOImmVal(CVal) != -1)
	break;
	}
	return;

	case 'J':
	if (Subtarget->isThumb1Only()) {
	// This must be a constant between -255 and -1, for negated ADD
	// immediates. This can be used in GCC with an "n" modifier that
	// prints the negated value, for use with SUB instructions. It is
	// not useful otherwise but is implemented for compatibility.
	if (CVal >= -255 && CVal <= -1)
	break;
	} else {
	// This must be a constant between -4095 and 4095. It is not clear
	// what this constraint is intended for. Implemented for
	// compatibility with GCC.
	if (CVal >= -4095 && CVal <= 4095)
	break;
	}
	return;

	case 'K':
	if (Subtarget->isThumb1Only()) {
	// A 32-bit value where only one byte has a nonzero value. Exclude
	// zero to match GCC. This constraint is used by GCC internally for
	// constants that can be loaded with a move/shift combination.
	// It is not useful otherwise but is implemented for compatibility.
	if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
	break;
	} else if (Subtarget->isThumb2()) {
	// A constant whose bitwise inverse can be used as an immediate
	// value in a data-processing instruction. This can be used in GCC
	// with a "B" modifier that prints the inverted value, for use with
	// BIC and MVN instructions. It is not useful otherwise but is
	// implemented for compatibility.
	if (ARM_AM::getT2SOImmVal(~CVal) != -1)
	break;
	} else {
	// A constant whose bitwise inverse can be used as an immediate
	// value in a data-processing instruction. This can be used in GCC
	// with a "B" modifier that prints the inverted value, for use with
	// BIC and MVN instructions. It is not useful otherwise but is
	// implemented for compatibility.
	if (ARM_AM::getSOImmVal(~CVal) != -1)
	break;
	}
	return;

	case 'L':
	if (Subtarget->isThumb1Only()) {
	// This must be a constant between -7 and 7,
	// for 3-operand ADD/SUB immediate instructions.
	if (CVal >= -7 && CVal < 7)
	break;
	} else if (Subtarget->isThumb2()) {
	// A constant whose negation can be used as an immediate value in a
	// data-processing instruction. This can be used in GCC with an "n"
	// modifier that prints the negated value, for use with SUB
	// instructions. It is not useful otherwise but is implemented for
	// compatibility.
	if (ARM_AM::getT2SOImmVal(-CVal) != -1)
	break;
	} else {
	// A constant whose negation can be used as an immediate value in a
	// data-processing instruction. This can be used in GCC with an "n"
	// modifier that prints the negated value, for use with SUB
	// instructions. It is not useful otherwise but is implemented for
	// compatibility.
	if (ARM_AM::getSOImmVal(-CVal) != -1)
	break;
	}
	return;

	case 'M':
	if (Subtarget->isThumb1Only()) {
	// This must be a multiple of 4 between 0 and 1020, for
	// ADD sp + immediate.
	if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
	break;
	} else {
	// A power of two or a constant between 0 and 32. This is used in
	// GCC for the shift amount on shifted register operands, but it is
	// useful in general for any shift amounts.
	if ((CVal >= 0 && CVal <= 32) \|\| ((CVal & (CVal - 1)) == 0))
	break;
	}
	return;

	case 'N':
	if (Subtarget->isThumb()) { // FIXME thumb2
	// This must be a constant between 0 and 31, for shift amounts.
	if (CVal >= 0 && CVal <= 31)
	break;
	}
	return;

	case 'O':
	if (Subtarget->isThumb()) { // FIXME thumb2
	// This must be a multiple of 4 between -508 and 508, for
	// ADD/SUB sp = sp + immediate.
	if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
	break;
	}
	return;
	}
	Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
	break;
	}

	if (Result.getNode()) {
	Ops.push_back(Result);
	return;
	}
	return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
	}

	static RTLIB::Libcall getDivRemLibcall(
	const SDNode *N, MVT::SimpleValueType SVT) {
	assert((N->getOpcode() == ISD::SDIVREM \|\| N->getOpcode() == ISD::UDIVREM \|\|
	N->getOpcode() == ISD::SREM \|\| N->getOpcode() == ISD::UREM) &&
	"Unhandled Opcode in getDivRemLibcall");
	bool isSigned = N->getOpcode() == ISD::SDIVREM \|\|
	N->getOpcode() == ISD::SREM;
	RTLIB::Libcall LC;
	switch (SVT) {
	default: llvm_unreachable("Unexpected request for libcall!");
	case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
	case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
	case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
	case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
	}
	return LC;
	}

	static TargetLowering::ArgListTy getDivRemArgList(
	const SDNode N, LLVMContext Context, const ARMSubtarget *Subtarget) {
	assert((N->getOpcode() == ISD::SDIVREM \|\| N->getOpcode() == ISD::UDIVREM \|\|
	N->getOpcode() == ISD::SREM \|\| N->getOpcode() == ISD::UREM) &&
	"Unhandled Opcode in getDivRemArgList");
	bool isSigned = N->getOpcode() == ISD::SDIVREM \|\|
	N->getOpcode() == ISD::SREM;
	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;
	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
	EVT ArgVT = N->getOperand(i).getValueType();
	Type ArgTy = ArgVT.getTypeForEVT(Context);
	Entry.Node = N->getOperand(i);
	Entry.Ty = ArgTy;
	Entry.IsSExt = isSigned;
	Entry.IsZExt = !isSigned;
	Args.push_back(Entry);
	}
	if (Subtarget->isTargetWindows() && Args.size() >= 2)
	std::swap(Args[0], Args[1]);
	return Args;
	}

	SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
	assert((Subtarget->isTargetAEABI() \|\| Subtarget->isTargetAndroid() \|\|
	Subtarget->isTargetGNUAEABI() \|\| Subtarget->isTargetMuslAEABI() \|\|
	Subtarget->isTargetWindows()) &&
	"Register-based DivRem lowering only");
	unsigned Opcode = Op->getOpcode();
	assert((Opcode == ISD::SDIVREM \|\| Opcode == ISD::UDIVREM) &&
	"Invalid opcode for Div/Rem lowering");
	bool isSigned = (Opcode == ISD::SDIVREM);
	EVT VT = Op->getValueType(0);
	Type Ty = VT.getTypeForEVT(DAG.getContext());
	SDLoc dl(Op);

	// If the target has hardware divide, use divide + multiply + subtract:
	// div = a / b
	// rem = a - b * div
	// return {div, rem}
	// This should be lowered into UDIV/SDIV + MLS later on.
	bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
	: Subtarget->hasDivideInARMMode();
	if (hasDivide && Op->getValueType(0).isSimple() &&
	Op->getSimpleValueType(0) == MVT::i32) {
	unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
	const SDValue Dividend = Op->getOperand(0);
	const SDValue Divisor = Op->getOperand(1);
	SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
	SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
	SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);

	SDValue Values[2] = {Div, Rem};
	return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
	}

	RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
	VT.getSimpleVT().SimpleTy);
	SDValue InChain = DAG.getEntryNode();

	TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
	DAG.getContext(),
	Subtarget);

	SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
	getPointerTy(DAG.getDataLayout()));

	Type *RetTy = StructType::get(Ty, Ty);

	if (Subtarget->isTargetWindows())
	InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl).setChain(InChain)
	.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
	.setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);

	std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
	return CallInfo.first;
	}

	// Lowers REM using divmod helpers
	// see RTABI section 4.2/4.3
	SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
	// Build return types (div and rem)
	std::vector<Type*> RetTyParams;
	Type *RetTyElement;

	switch (N->getValueType(0).getSimpleVT().SimpleTy) {
	default: llvm_unreachable("Unexpected request for libcall!");
	case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
	case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
	case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
	case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
	}

	RetTyParams.push_back(RetTyElement);
	RetTyParams.push_back(RetTyElement);
	ArrayRef<Type> ret = ArrayRef<Type>(RetTyParams);
	Type RetTy = StructType::get(DAG.getContext(), ret);

	RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
	SimpleTy);
	SDValue InChain = DAG.getEntryNode();
	TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(),
	Subtarget);
	bool isSigned = N->getOpcode() == ISD::SREM;
	SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
	getPointerTy(DAG.getDataLayout()));

	if (Subtarget->isTargetWindows())
	InChain = WinDBZCheckDenominator(DAG, N, InChain);

	// Lower call
	CallLoweringInfo CLI(DAG);
	CLI.setChain(InChain)
	.setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
	.setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);

	// Return second (rem) result operand (first contains div)
	SDNode *ResNode = CallResult.first.getNode();
	assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
	return ResNode->getOperand(1);
	}

	SDValue
	ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
	assert(Subtarget->isTargetWindows() && "unsupported target platform");
	SDLoc DL(Op);

	// Get the inputs.
	SDValue Chain = Op.getOperand(0);
	SDValue Size = Op.getOperand(1);

	SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
	DAG.getConstant(2, DL, MVT::i32));

	SDValue Flag;
	Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
	Flag = Chain.getValue(1);

	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);

	SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
	Chain = NewSP.getValue(1);

	SDValue Ops[2] = { NewSP, Chain };
	return DAG.getMergeValues(Ops, DL);
	}

	SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
	assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
	"Unexpected type for custom-lowering FP_EXTEND");

	RTLIB::Libcall LC;
	LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());

	SDValue SrcVal = Op.getOperand(0);
	return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /isSigned/ false,
	SDLoc(Op)).first;
	}

	SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
	assert(Op.getOperand(0).getValueType() == MVT::f64 &&
	Subtarget->isFPOnlySP() &&
	"Unexpected type for custom-lowering FP_ROUND");

	RTLIB::Libcall LC;
	LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());

	SDValue SrcVal = Op.getOperand(0);
	return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /isSigned/ false,
	SDLoc(Op)).first;
	}

	bool
	ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
	// The ARM target isn't yet aware of offsets.
	return false;
	}

	bool ARM::isBitFieldInvertedMask(unsigned v) {
	if (v == 0xffffffff)
	return false;

	// there can be 1's on either or both "outsides", all the "inside"
	// bits must be 0's
	return isShiftedMask_32(~v);
	}

	/// isFPImmLegal - Returns true if the target can instruction select the
	/// specified FP immediate natively. If false, the legalizer will
	/// materialize the FP immediate as a load from a constant pool.
	bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
	if (!Subtarget->hasVFP3())
	return false;
	if (VT == MVT::f32)
	return ARM_AM::getFP32Imm(Imm) != -1;
	if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
	return ARM_AM::getFP64Imm(Imm) != -1;
	return false;
	}

	/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
	/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
	/// specified in the intrinsic calls.
	bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
	const CallInst &I,
	unsigned Intrinsic) const {
	switch (Intrinsic) {
	case Intrinsic::arm_neon_vld1:
	case Intrinsic::arm_neon_vld2:
	case Intrinsic::arm_neon_vld3:
	case Intrinsic::arm_neon_vld4:
	case Intrinsic::arm_neon_vld2lane:
	case Intrinsic::arm_neon_vld3lane:
	case Intrinsic::arm_neon_vld4lane: {
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	// Conservatively set memVT to the entire set of vectors loaded.
	auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
	uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
	Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = 0;
	Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
	Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
	Info.vol = false; // volatile loads with NEON intrinsics not supported
	Info.readMem = true;
	Info.writeMem = false;
	return true;
	}
	case Intrinsic::arm_neon_vst1:
	case Intrinsic::arm_neon_vst2:
	case Intrinsic::arm_neon_vst3:
	case Intrinsic::arm_neon_vst4:
	case Intrinsic::arm_neon_vst2lane:
	case Intrinsic::arm_neon_vst3lane:
	case Intrinsic::arm_neon_vst4lane: {
	Info.opc = ISD::INTRINSIC_VOID;
	// Conservatively set memVT to the entire set of vectors stored.
	auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
	unsigned NumElts = 0;
	for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
	Type *ArgTy = I.getArgOperand(ArgI)->getType();
	if (!ArgTy->isVectorTy())
	break;
	NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
	}
	Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = 0;
	Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
	Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
	Info.vol = false; // volatile stores with NEON intrinsics not supported
	Info.readMem = false;
	Info.writeMem = true;
	return true;
	}
	case Intrinsic::arm_ldaex:
	case Intrinsic::arm_ldrex: {
	auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
	PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(PtrTy->getElementType());
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = 0;
	Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
	Info.vol = true;
	Info.readMem = true;
	Info.writeMem = false;
	return true;
	}
	case Intrinsic::arm_stlex:
	case Intrinsic::arm_strex: {
	auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
	PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(PtrTy->getElementType());
	Info.ptrVal = I.getArgOperand(1);
	Info.offset = 0;
	Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
	Info.vol = true;
	Info.readMem = false;
	Info.writeMem = true;
	return true;
	}
	case Intrinsic::arm_stlexd:
	case Intrinsic::arm_strexd:
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::i64;
	Info.ptrVal = I.getArgOperand(2);
	Info.offset = 0;
	Info.align = 8;
	Info.vol = true;
	Info.readMem = false;
	Info.writeMem = true;
	return true;

	case Intrinsic::arm_ldaexd:
	case Intrinsic::arm_ldrexd:
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::i64;
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = 0;
	Info.align = 8;
	Info.vol = true;
	Info.readMem = true;
	Info.writeMem = false;
	return true;

	default:
	break;
	}

	return false;
	}

	/// \brief Returns true if it is beneficial to convert a load of a constant
	/// to just the constant itself.
	bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
	Type *Ty) const {
	assert(Ty->isIntegerTy());

	unsigned Bits = Ty->getPrimitiveSizeInBits();
	if (Bits == 0 \|\| Bits > 32)
	return false;
	return true;
	}

	bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT,
	unsigned Index) const {
	if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
	return false;

	return (Index == 0 \|\| Index == ResVT.getVectorNumElements());
	}

	Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
	ARM_MB::MemBOpt Domain) const {
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();

	// First, if the target has no DMB, see what fallback we can use.
	if (!Subtarget->hasDataBarrier()) {
	// Some ARMv6 cpus can support data barriers with an mcr instruction.
	// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
	// here.
	if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
	Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
	Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
	Builder.getInt32(0), Builder.getInt32(7),
	Builder.getInt32(10), Builder.getInt32(5)};
	return Builder.CreateCall(MCR, args);
	} else {
	// Instead of using barriers, atomic accesses on these subtargets use
	// libcalls.
	llvm_unreachable("makeDMB on a target so old that it has no barriers");
	}
	} else {
	Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
	// Only a full system barrier exists in the M-class architectures.
	Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
	Constant *CDomain = Builder.getInt32(Domain);
	return Builder.CreateCall(DMB, CDomain);
	}
	}

	// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
	Instruction *ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
	Instruction *Inst,
	AtomicOrdering Ord) const {
	switch (Ord) {
	case AtomicOrdering::NotAtomic:
	case AtomicOrdering::Unordered:
	llvm_unreachable("Invalid fence: unordered/non-atomic");
	case AtomicOrdering::Monotonic:
	case AtomicOrdering::Acquire:
	return nullptr; // Nothing to do
	case AtomicOrdering::SequentiallyConsistent:
	if (!Inst->hasAtomicStore())
	return nullptr; // Nothing to do
	/FALLTHROUGH/
	case AtomicOrdering::Release:
	case AtomicOrdering::AcquireRelease:
	if (Subtarget->preferISHSTBarriers())
	return makeDMB(Builder, ARM_MB::ISHST);
	// FIXME: add a comment with a link to documentation justifying this.
	else
	return makeDMB(Builder, ARM_MB::ISH);
	}
	llvm_unreachable("Unknown fence ordering in emitLeadingFence");
	}

	Instruction *ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
	Instruction *Inst,
	AtomicOrdering Ord) const {
	switch (Ord) {
	case AtomicOrdering::NotAtomic:
	case AtomicOrdering::Unordered:
	llvm_unreachable("Invalid fence: unordered/not-atomic");
	case AtomicOrdering::Monotonic:
	case AtomicOrdering::Release:
	return nullptr; // Nothing to do
	case AtomicOrdering::Acquire:
	case AtomicOrdering::AcquireRelease:
	case AtomicOrdering::SequentiallyConsistent:
	return makeDMB(Builder, ARM_MB::ISH);
	}
	llvm_unreachable("Unknown fence ordering in emitTrailingFence");
	}

	// Loads and stores less than 64-bits are already atomic; ones above that
	// are doomed anyway, so defer to the default libcall and blame the OS when
	// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
	// anything for those.
	bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
	unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
	return (Size == 64) && !Subtarget->isMClass();
	}

	// Loads and stores less than 64-bits are already atomic; ones above that
	// are doomed anyway, so defer to the default libcall and blame the OS when
	// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
	// anything for those.
	// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
	// guarantee, see DDI0406C ARM architecture reference manual,
	// sections A8.8.72-74 LDRD)
	TargetLowering::AtomicExpansionKind
	ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
	unsigned Size = LI->getType()->getPrimitiveSizeInBits();
	return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
	: AtomicExpansionKind::None;
	}

	// For the real atomic operations, we have ldrex/strex up to 32 bits,
	// and up to 64 bits on the non-M profiles
	TargetLowering::AtomicExpansionKind
	ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
	unsigned Size = AI->getType()->getPrimitiveSizeInBits();
	bool hasAtomicRMW = !Subtarget->isThumb() \|\| Subtarget->hasV8MBaselineOps();
	return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
	? AtomicExpansionKind::LLSC
	: AtomicExpansionKind::None;
	}

	bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
	AtomicCmpXchgInst *AI) const {
	// At -O0, fast-regalloc cannot cope with the live vregs necessary to
	// implement cmpxchg without spilling. If the address being exchanged is also
	// on the stack and close enough to the spill slot, this can lead to a
	// situation where the monitor always gets cleared and the atomic operation
	// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
	bool hasAtomicCmpXchg =
	!Subtarget->isThumb() \|\| Subtarget->hasV8MBaselineOps();
	return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg;
	}

	bool ARMTargetLowering::shouldInsertFencesForAtomic(
	const Instruction *I) const {
	return InsertFencesForAtomic;
	}

	// This has so far only been implemented for MachO.
	bool ARMTargetLowering::useLoadStackGuardNode() const {
	return Subtarget->isTargetMachO();
	}

	bool ARMTargetLowering::canCombineStoreAndExtract(Type VectorTy, Value Idx,
	unsigned &Cost) const {
	// If we do not have NEON, vector types are not natively supported.
	if (!Subtarget->hasNEON())
	return false;

	// Floating point values and vector values map to the same register file.
	// Therefore, although we could do a store extract of a vector type, this is
	// better to leave at float as we have more freedom in the addressing mode for
	// those.
	if (VectorTy->isFPOrFPVectorTy())
	return false;

	// If the index is unknown at compile time, this is very expensive to lower
	// and it is not possible to combine the store with the extract.
	if (!isa<ConstantInt>(Idx))
	return false;

	assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
	unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
	// We can do a store + vector extract on any vector that fits perfectly in a D
	// or Q register.
	if (BitWidth == 64 \|\| BitWidth == 128) {
	Cost = 0;
	return true;
	}
	return false;
	}

	bool ARMTargetLowering::isCheapToSpeculateCttz() const {
	return Subtarget->hasV6T2Ops();
	}

	bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
	return Subtarget->hasV6T2Ops();
	}

	Value ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value Addr,
	AtomicOrdering Ord) const {
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
	bool IsAcquire = isAcquireOrStronger(Ord);

	// Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
	// intrinsic must return {i32, i32} and we have to recombine them into a
	// single i64 here.
	if (ValTy->getPrimitiveSizeInBits() == 64) {
	Intrinsic::ID Int =
	IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
	Function *Ldrex = Intrinsic::getDeclaration(M, Int);

	Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
	Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");

	Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
	Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
	if (!Subtarget->isLittle())
	std::swap (Lo, Hi);
	Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
	Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
	return Builder.CreateOr(
	Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
	}

	Type *Tys[] = { Addr->getType() };
	Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
	Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);

	return Builder.CreateTruncOrBitCast(
	Builder.CreateCall(Ldrex, Addr),
	cast<PointerType>(Addr->getType())->getElementType());
	}

	void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
	IRBuilder<> &Builder) const {
	if (!Subtarget->hasV7Ops())
	return;
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
	}

	Value ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value Val,
	Value *Addr,
	AtomicOrdering Ord) const {
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	bool IsRelease = isReleaseOrStronger(Ord);

	// Since the intrinsics must have legal type, the i64 intrinsics take two
	// parameters: "i32, i32". We must marshal Val into the appropriate form
	// before the call.
	if (Val->getType()->getPrimitiveSizeInBits() == 64) {
	Intrinsic::ID Int =
	IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
	Function *Strex = Intrinsic::getDeclaration(M, Int);
	Type *Int32Ty = Type::getInt32Ty(M->getContext());

	Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
	Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
	if (!Subtarget->isLittle())
	std::swap (Lo, Hi);
	Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
	return Builder.CreateCall(Strex, {Lo, Hi, Addr});
	}

	Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
	Type *Tys[] = { Addr->getType() };
	Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);

	return Builder.CreateCall(
	Strex, {Builder.CreateZExtOrBitCast(
	Val, Strex->getFunctionType()->getParamType(0)),
	Addr});
	}

	/// A helper function for determining the number of interleaved accesses we
	/// will generate when lowering accesses of the given type.
	unsigned
	ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
	const DataLayout &DL) const {
	return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
	}

	bool ARMTargetLowering::isLegalInterleavedAccessType(
	VectorType *VecTy, const DataLayout &DL) const {

	unsigned VecSize = DL.getTypeSizeInBits(VecTy);
	unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());

	// Ensure the vector doesn't have f16 elements. Even though we could do an
	// i16 vldN, we can't hold the f16 vectors and will end up converting via
	// f32.
	if (VecTy->getElementType()->isHalfTy())
	return false;

	// Ensure the number of vector elements is greater than 1.
	if (VecTy->getNumElements() < 2)
	return false;

	// Ensure the element type is legal.
	if (ElSize != 8 && ElSize != 16 && ElSize != 32)
	return false;

	// Ensure the total vector size is 64 or a multiple of 128. Types larger than
	// 128 will be split into multiple interleaved accesses.
	return VecSize == 64 \|\| VecSize % 128 == 0;
	}

	/// \brief Lower an interleaved load into a vldN intrinsic.
	///
	/// E.g. Lower an interleaved load (Factor = 2):
	/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
	/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
	/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
	///
	/// Into:
	/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
	/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
	/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
	bool ARMTargetLowering::lowerInterleavedLoad(
	LoadInst LI, ArrayRef<ShuffleVectorInst > Shuffles,
	ArrayRef<unsigned> Indices, unsigned Factor) const {
	assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
	"Invalid interleave factor");
	assert(!Shuffles.empty() && "Empty shufflevector input");
	assert(Shuffles.size() == Indices.size() &&
	"Unmatched number of shufflevectors and indices");

	VectorType *VecTy = Shuffles[0]->getType();
	Type *EltTy = VecTy->getVectorElementType();

	const DataLayout &DL = LI->getModule()->getDataLayout();

	// Skip if we do not have NEON and skip illegal vector types. We can
	// "legalize" wide vector types into multiple interleaved accesses as long as
	// the vector types are divisible by 128.
	if (!Subtarget->hasNEON() \|\| !isLegalInterleavedAccessType(VecTy, DL))
	return false;

	unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);

	// A pointer vector can not be the return type of the ldN intrinsics. Need to
	// load integer vectors first and then convert to pointer vectors.
	if (EltTy->isPointerTy())
	VecTy =
	VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());

	IRBuilder<> Builder(LI);

	// The base address of the load.
	Value *BaseAddr = LI->getPointerOperand();

	if (NumLoads > 1) {
	// If we're going to generate more than one load, reset the sub-vector type
	// to something legal.
	VecTy = VectorType::get(VecTy->getVectorElementType(),
	VecTy->getVectorNumElements() / NumLoads);

	// We will compute the pointer operand of each load from the original base
	// address using GEPs. Cast the base address to a pointer to the scalar
	// element type.
	BaseAddr = Builder.CreateBitCast(
	BaseAddr, VecTy->getVectorElementType()->getPointerTo(
	LI->getPointerAddressSpace()));
	}

	assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");

	Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
	Type *Tys[] = {VecTy, Int8Ptr};
	static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
	Intrinsic::arm_neon_vld3,
	Intrinsic::arm_neon_vld4};
	Function *VldnFunc =
	Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);

	// Holds sub-vectors extracted from the load intrinsic return values. The
	// sub-vectors are associated with the shufflevector instructions they will
	// replace.
	DenseMap<ShuffleVectorInst , SmallVector<Value , 4>> SubVecs;

	for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {

	// If we're generating more than one load, compute the base address of
	// subsequent loads as an offset from the previous.
	if (LoadCount > 0)
	BaseAddr = Builder.CreateConstGEP1_32(
	BaseAddr, VecTy->getVectorNumElements() * Factor);

	SmallVector<Value *, 2> Ops;
	Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
	Ops.push_back(Builder.getInt32(LI->getAlignment()));

	CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");

	// Replace uses of each shufflevector with the corresponding vector loaded
	// by ldN.
	for (unsigned i = 0; i < Shuffles.size(); i++) {
	ShuffleVectorInst *SV = Shuffles[i];
	unsigned Index = Indices[i];

	Value *SubVec = Builder.CreateExtractValue(VldN, Index);

	// Convert the integer vector to pointer vector if the element is pointer.
	if (EltTy->isPointerTy())
	SubVec = Builder.CreateIntToPtr(
	SubVec, VectorType::get(SV->getType()->getVectorElementType(),
	VecTy->getVectorNumElements()));

	SubVecs[SV].push_back(SubVec);
	}
	}

	// Replace uses of the shufflevector instructions with the sub-vectors
	// returned by the load intrinsic. If a shufflevector instruction is
	// associated with more than one sub-vector, those sub-vectors will be
	// concatenated into a single wide vector.
	for (ShuffleVectorInst *SVI : Shuffles) {
	auto &SubVec = SubVecs[SVI];
	auto *WideVec =
	SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
	SVI->replaceAllUsesWith(WideVec);
	}

	return true;
	}

	/// \brief Lower an interleaved store into a vstN intrinsic.
	///
	/// E.g. Lower an interleaved store (Factor = 3):
	/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
	/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
	/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
	///
	/// Into:
	/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
	/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
	/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
	/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
	///
	/// Note that the new shufflevectors will be removed and we'll only generate one
	/// vst3 instruction in CodeGen.
	///
	/// Example for a more general valid mask (Factor 3). Lower:
	/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
	/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
	/// store <12 x i32> %i.vec, <12 x i32>* %ptr
	///
	/// Into:
	/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
	/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
	/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
	/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
	bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
	ShuffleVectorInst *SVI,
	unsigned Factor) const {
	assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
	"Invalid interleave factor");

	VectorType *VecTy = SVI->getType();
	assert(VecTy->getVectorNumElements() % Factor == 0 &&
	"Invalid interleaved store");

	unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
	Type *EltTy = VecTy->getVectorElementType();
	VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);

	const DataLayout &DL = SI->getModule()->getDataLayout();

	// Skip if we do not have NEON and skip illegal vector types. We can
	// "legalize" wide vector types into multiple interleaved accesses as long as
	// the vector types are divisible by 128.
	if (!Subtarget->hasNEON() \|\| !isLegalInterleavedAccessType(SubVecTy, DL))
	return false;

	unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);

	Value *Op0 = SVI->getOperand(0);
	Value *Op1 = SVI->getOperand(1);
	IRBuilder<> Builder(SI);

	// StN intrinsics don't support pointer vectors as arguments. Convert pointer
	// vectors to integer vectors.
	if (EltTy->isPointerTy()) {
	Type *IntTy = DL.getIntPtrType(EltTy);

	// Convert to the corresponding integer vector.
	Type *IntVecTy =
	VectorType::get(IntTy, Op0->getType()->getVectorNumElements());
	Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
	Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);

	SubVecTy = VectorType::get(IntTy, LaneLen);
	}

	// The base address of the store.
	Value *BaseAddr = SI->getPointerOperand();

	if (NumStores > 1) {
	// If we're going to generate more than one store, reset the lane length
	// and sub-vector type to something legal.
	LaneLen /= NumStores;
	SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);

	// We will compute the pointer operand of each store from the original base
	// address using GEPs. Cast the base address to a pointer to the scalar
	// element type.
	BaseAddr = Builder.CreateBitCast(
	BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
	SI->getPointerAddressSpace()));
	}

	assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");

	auto Mask = SVI->getShuffleMask();

	Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
	Type *Tys[] = {Int8Ptr, SubVecTy};
	static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
	Intrinsic::arm_neon_vst3,
	Intrinsic::arm_neon_vst4};

	for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {

	// If we generating more than one store, we compute the base address of
	// subsequent stores as an offset from the previous.
	if (StoreCount > 0)
	BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);

	SmallVector<Value *, 6> Ops;
	Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));

	Function *VstNFunc =
	Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);

	// Split the shufflevector operands into sub vectors for the new vstN call.
	for (unsigned i = 0; i < Factor; i++) {
	unsigned IdxI = StoreCount * LaneLen * Factor + i;
	if (Mask[IdxI] >= 0) {
	Ops.push_back(Builder.CreateShuffleVector(
	Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
	} else {
	unsigned StartMask = 0;
	for (unsigned j = 1; j < LaneLen; j++) {
	unsigned IdxJ = StoreCount * LaneLen * Factor + j;
	if (Mask[IdxJ * Factor + IdxI] >= 0) {
	StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
	break;
	}
	}
	// Note: If all elements in a chunk are undefs, StartMask=0!
	// Note: Filling undef gaps with random elements is ok, since
	// those elements were being written anyway (with undefs).
	// In the case of all undefs we're defaulting to using elems from 0
	// Note: StartMask cannot be negative, it's checked in
	// isReInterleaveMask
	Ops.push_back(Builder.CreateShuffleVector(
	Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
	}
	}

	Ops.push_back(Builder.getInt32(SI->getAlignment()));
	Builder.CreateCall(VstNFunc, Ops);
	}
	return true;
	}

	enum HABaseType {
	HA_UNKNOWN = 0,
	HA_FLOAT,
	HA_DOUBLE,
	HA_VECT64,
	HA_VECT128
	};

	static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
	uint64_t &Members) {
	if (auto *ST = dyn_cast<StructType>(Ty)) {
	for (unsigned i = 0; i < ST->getNumElements(); ++i) {
	uint64_t SubMembers = 0;
	if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
	return false;
	Members += SubMembers;
	}
	} else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
	uint64_t SubMembers = 0;
	if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
	return false;
	Members += SubMembers * AT->getNumElements();
	} else if (Ty->isFloatTy()) {
	if (Base != HA_UNKNOWN && Base != HA_FLOAT)
	return false;
	Members = 1;
	Base = HA_FLOAT;
	} else if (Ty->isDoubleTy()) {
	if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
	return false;
	Members = 1;
	Base = HA_DOUBLE;
	} else if (auto *VT = dyn_cast<VectorType>(Ty)) {
	Members = 1;
	switch (Base) {
	case HA_FLOAT:
	case HA_DOUBLE:
	return false;
	case HA_VECT64:
	return VT->getBitWidth() == 64;
	case HA_VECT128:
	return VT->getBitWidth() == 128;
	case HA_UNKNOWN:
	switch (VT->getBitWidth()) {
	case 64:
	Base = HA_VECT64;
	return true;
	case 128:
	Base = HA_VECT128;
	return true;
	default:
	return false;
	}
	}
	}

	return (Members > 0 && Members <= 4);
	}

	/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
	/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
	/// passing according to AAPCS rules.
	bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
	Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
	if (getEffectiveCallingConv(CallConv, isVarArg) !=
	CallingConv::ARM_AAPCS_VFP)
	return false;

	HABaseType Base = HA_UNKNOWN;
	uint64_t Members = 0;
	bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
	DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());

	bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
	return IsHA \|\| IsIntArray;
	}

	unsigned ARMTargetLowering::getExceptionPointerRegister(
	const Constant *PersonalityFn) const {
	// Platforms which do not use SjLj EH may return values in these registers
	// via the personality function.
	return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
	}

	unsigned ARMTargetLowering::getExceptionSelectorRegister(
	const Constant *PersonalityFn) const {
	// Platforms which do not use SjLj EH may return values in these registers
	// via the personality function.
	return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
	}

	void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
	// Update IsSplitCSR in ARMFunctionInfo.
	ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
	AFI->setIsSplitCSR(true);
	}

	void ARMTargetLowering::insertCopiesSplitCSR(
	MachineBasicBlock *Entry,
	const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
	const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
	const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
	if (!IStart)
	return;

	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
	MachineBasicBlock::iterator MBBI = Entry->begin();
	for (const MCPhysReg I = IStart; I; ++I) {
	const TargetRegisterClass *RC = nullptr;
	if (ARM::GPRRegClass.contains(*I))
	RC = &ARM::GPRRegClass;
	else if (ARM::DPRRegClass.contains(*I))
	RC = &ARM::DPRRegClass;
	else
	llvm_unreachable("Unexpected register class in CSRsViaCopy!");

	unsigned NewVR = MRI->createVirtualRegister(RC);
	// Create copy from CSR to a virtual register.
	// FIXME: this currently does not emit CFI pseudo-instructions, it works
	// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
	// nounwind. If we want to generalize this later, we may need to emit
	// CFI pseudo-instructions.
	assert(Entry->getParent()->getFunction()->hasFnAttribute(
	Attribute::NoUnwind) &&
	"Function should be nounwind in insertCopiesSplitCSR!");
	Entry->addLiveIn(*I);
	BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
	.addReg(*I);

	// Insert the copy-back instructions right before the terminator.
	for (auto *Exit : Exits)
	BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
	TII->get(TargetOpcode::COPY), *I)
	.addReg(NewVR);
	}
	}

	void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const {
	MF.getFrameInfo().computeMaxCallFrameSize(MF);
	TargetLoweringBase::finalizeLowering(MF);
	}
	Index: head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
	===================================================================
	--- head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp (revision 322854)
	+++ head/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp (revision 322855)
	@@ -1,36742 +1,36749 @@
	//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the interfaces that X86 uses to lower LLVM code into a
	// selection DAG.
	//
	//===----------------------------------------------------------------------===//

	#include "X86ISelLowering.h"
	#include "Utils/X86ShuffleDecode.h"
	#include "X86CallingConv.h"
	#include "X86FrameLowering.h"
	#include "X86InstrBuilder.h"
	#include "X86IntrinsicsInfo.h"
	#include "X86MachineFunctionInfo.h"
	#include "X86ShuffleDecodeConstantPool.h"
	#include "X86TargetMachine.h"
	#include "X86TargetObjectFile.h"
	#include "llvm/ADT/SmallBitVector.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Analysis/EHPersonalities.h"
	#include "llvm/CodeGen/IntrinsicLowering.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineJumpTableInfo.h"
	#include "llvm/CodeGen/MachineModuleInfo.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/WinEHFuncInfo.h"
	#include "llvm/IR/CallSite.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/DiagnosticInfo.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalAlias.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/MC/MCAsmInfo.h"
	#include "llvm/MC/MCContext.h"
	#include "llvm/MC/MCExpr.h"
	#include "llvm/MC/MCSymbol.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Target/TargetLowering.h"
	#include "llvm/Target/TargetOptions.h"
	#include <algorithm>
	#include <bitset>
	#include <cctype>
	#include <numeric>
	using namespace llvm;

	#define DEBUG_TYPE "x86-isel"

	STATISTIC(NumTailCalls, "Number of tail calls");

	static cl::opt<bool> ExperimentalVectorWideningLegalization(
	"x86-experimental-vector-widening-legalization", cl::init(false),
	cl::desc("Enable an experimental vector type legalization through widening "
	"rather than promotion."),
	cl::Hidden);

	static cl::opt<int> ExperimentalPrefLoopAlignment(
	"x86-experimental-pref-loop-alignment", cl::init(4),
	cl::desc("Sets the preferable loop alignment for experiments "
	"(the last x86-experimental-pref-loop-alignment bits"
	" of the loop header PC will be 0)."),
	cl::Hidden);

	static cl::opt<bool> MulConstantOptimization(
	"mul-constant-optimization", cl::init(true),
	cl::desc("Replace 'mul x, Const' with more effective instructions like "
	"SHIFT, LEA, etc."),
	cl::Hidden);

	/// Call this when the user attempts to do something unsupported, like
	/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
	/// report_fatal_error, so calling code should attempt to recover without
	/// crashing.
	static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
	const char *Msg) {
	MachineFunction &MF = DAG.getMachineFunction();
	DAG.getContext()->diagnose(
	DiagnosticInfoUnsupported(*MF.getFunction(), Msg, dl.getDebugLoc()));
	}

	X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
	const X86Subtarget &STI)
	: TargetLowering(TM), Subtarget(STI) {
	bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
	X86ScalarSSEf64 = Subtarget.hasSSE2();
	X86ScalarSSEf32 = Subtarget.hasSSE1();
	MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());

	// Set up the TargetLowering object.

	// X86 is weird. It always uses i8 for shift amounts and setcc results.
	setBooleanContents(ZeroOrOneBooleanContent);
	// X86-SSE is even stranger. It uses -1 or 0 for vector masks.
	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);

	// For 64-bit, since we have so many registers, use the ILP scheduler.
	// For 32-bit, use the register pressure specific scheduling.
	// For Atom, always use ILP scheduling.
	if (Subtarget.isAtom())
	setSchedulingPreference(Sched::ILP);
	else if (Subtarget.is64Bit())
	setSchedulingPreference(Sched::ILP);
	else
	setSchedulingPreference(Sched::RegPressure);
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());

	// Bypass expensive divides and use cheaper ones.
	if (TM.getOptLevel() >= CodeGenOpt::Default) {
	if (Subtarget.hasSlowDivide32())
	addBypassSlowDiv(32, 8);
	if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
	addBypassSlowDiv(64, 32);
	}

	if (Subtarget.isTargetKnownWindowsMSVC() \|\|
	Subtarget.isTargetWindowsItanium()) {
	// Setup Windows compiler runtime calls.
	setLibcallName(RTLIB::SDIV_I64, "_alldiv");
	setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
	setLibcallName(RTLIB::SREM_I64, "_allrem");
	setLibcallName(RTLIB::UREM_I64, "_aullrem");
	setLibcallName(RTLIB::MUL_I64, "_allmul");
	setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
	setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
	setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
	setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
	setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
	}

	if (Subtarget.isTargetDarwin()) {
	// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
	setUseUnderscoreSetJmp(false);
	setUseUnderscoreLongJmp(false);
	} else if (Subtarget.isTargetWindowsGNU()) {
	// MS runtime is weird: it exports _setjmp, but longjmp!
	setUseUnderscoreSetJmp(true);
	setUseUnderscoreLongJmp(false);
	} else {
	setUseUnderscoreSetJmp(true);
	setUseUnderscoreLongJmp(true);
	}

	// Set up the register classes.
	addRegisterClass(MVT::i8, &X86::GR8RegClass);
	addRegisterClass(MVT::i16, &X86::GR16RegClass);
	addRegisterClass(MVT::i32, &X86::GR32RegClass);
	if (Subtarget.is64Bit())
	addRegisterClass(MVT::i64, &X86::GR64RegClass);

	for (MVT VT : MVT::integer_valuetypes())
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);

	// We don't accept any truncstore of integer registers.
	setTruncStoreAction(MVT::i64, MVT::i32, Expand);
	setTruncStoreAction(MVT::i64, MVT::i16, Expand);
	setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
	setTruncStoreAction(MVT::i32, MVT::i16, Expand);
	setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
	setTruncStoreAction(MVT::i16, MVT::i8, Expand);

	setTruncStoreAction(MVT::f64, MVT::f32, Expand);

	// SETOEQ and SETUNE require checking two conditions.
	setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
	setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
	setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
	setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
	setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
	setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);

	// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
	// operation.
	setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
	setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
	setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);

	if (Subtarget.is64Bit()) {
	if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
	// f32/f64 are legal, f80 is custom.
	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
	else
	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
	setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
	} else if (!Subtarget.useSoftFloat()) {
	// We have an algorithm for SSE2->double, and we turn this into a
	// 64-bit FILD followed by conditional FADD for other targets.
	setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
	// We have an algorithm for SSE2, and we turn this into a 64-bit
	// FILD or VCVTUSI2SS/SD for other targets.
	setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
	}

	// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
	// this operation.
	setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
	setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);

	if (!Subtarget.useSoftFloat()) {
	// SSE has no i16 to fp conversion, only i32.
	if (X86ScalarSSEf32) {
	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
	// f32 and f64 cases are Legal, f80 case is not
	setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
	} else {
	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
	setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
	}
	} else {
	setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
	setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
	}

	// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
	// this operation.
	setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
	setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);

	if (!Subtarget.useSoftFloat()) {
	// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
	// are Legal, f80 is custom lowered.
	setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
	setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);

	if (X86ScalarSSEf32) {
	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
	// f32 and f64 cases are Legal, f80 case is not
	setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
	} else {
	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
	setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
	}
	} else {
	setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
	setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
	setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
	}

	// Handle FP_TO_UINT by promoting the destination to a larger signed
	// conversion.
	setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
	setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
	setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);

	if (Subtarget.is64Bit()) {
	if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
	// FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
	setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
	} else {
	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
	setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
	}
	} else if (!Subtarget.useSoftFloat()) {
	// Since AVX is a superset of SSE3, only check for SSE here.
	if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
	// Expand FP_TO_UINT into a select.
	// FIXME: We would like to use a Custom expander here eventually to do
	// the optimal thing for SSE vs. the default expansion in the legalizer.
	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
	else
	// With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
	// With SSE3 we can use fisttpll to convert to a signed i64; without
	// SSE, we're stuck with a fistpll.
	setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);

	setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
	}

	// TODO: when we have SSE, these could be more efficient, by using movd/movq.
	if (!X86ScalarSSEf64) {
	setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
	setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
	if (Subtarget.is64Bit()) {
	setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
	// Without SSE, i64->f64 goes through memory.
	setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
	}
	} else if (!Subtarget.is64Bit())
	setOperationAction(ISD::BITCAST , MVT::i64 , Custom);

	// Scalar integer divide and remainder are lowered to use operations that
	// produce two results, to match the available instructions. This exposes
	// the two-result form to trivial CSE, which is able to combine x/y and x%y
	// into a single instruction.
	//
	// Scalar integer multiply-high is also lowered to use two-result
	// operations, to match the available instructions. However, plain multiply
	// (low) operations are left as Legal, as there are single-result
	// instructions for this in x86. Using the two-result multiply instructions
	// when both high and low results are needed must be arranged by dagcombine.
	for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
	setOperationAction(ISD::MULHS, VT, Expand);
	setOperationAction(ISD::MULHU, VT, Expand);
	setOperationAction(ISD::SDIV, VT, Expand);
	setOperationAction(ISD::UDIV, VT, Expand);
	setOperationAction(ISD::SREM, VT, Expand);
	setOperationAction(ISD::UREM, VT, Expand);
	}

	setOperationAction(ISD::BR_JT , MVT::Other, Expand);
	setOperationAction(ISD::BRCOND , MVT::Other, Custom);
	for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
	MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
	setOperationAction(ISD::BR_CC, VT, Expand);
	setOperationAction(ISD::SELECT_CC, VT, Expand);
	}
	if (Subtarget.is64Bit())
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
	setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);

	setOperationAction(ISD::FREM , MVT::f32 , Expand);
	setOperationAction(ISD::FREM , MVT::f64 , Expand);
	setOperationAction(ISD::FREM , MVT::f80 , Expand);
	setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);

	// Promote the i8 variants and force them on up to i32 which has a shorter
	// encoding.
	setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
	setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
	if (!Subtarget.hasBMI()) {
	setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
	setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
	if (Subtarget.is64Bit()) {
	setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
	setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
	}
	}

	if (Subtarget.hasLZCNT()) {
	// When promoting the i8 variants, force them to i32 for a shorter
	// encoding.
	setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
	setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
	} else {
	setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
	setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
	setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
	setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
	setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
	setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
	if (Subtarget.is64Bit()) {
	setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
	setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
	}
	}

	// Special handling for half-precision floating point conversions.
	// If we don't have F16C support, then lower half float conversions
	// into library calls.
	if (Subtarget.useSoftFloat() \|\|
	(!Subtarget.hasF16C() && !Subtarget.hasAVX512())) {
	setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
	setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
	}

	// There's never any support for operations beyond MVT::f32.
	setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
	setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
	setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
	setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);

	setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
	setTruncStoreAction(MVT::f32, MVT::f16, Expand);
	setTruncStoreAction(MVT::f64, MVT::f16, Expand);
	setTruncStoreAction(MVT::f80, MVT::f16, Expand);

	if (Subtarget.hasPOPCNT()) {
	setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
	} else {
	setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
	setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
	setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
	if (Subtarget.is64Bit())
	setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
	}

	setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);

	if (!Subtarget.hasMOVBE())
	setOperationAction(ISD::BSWAP , MVT::i16 , Expand);

	// These should be promoted to a larger select which is supported.
	setOperationAction(ISD::SELECT , MVT::i1 , Promote);
	// X86 wants to expand cmov itself.
	for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	}
	for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
	if (VT == MVT::i64 && !Subtarget.is64Bit())
	continue;
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	}

	// Custom action for SELECT MMX and expand action for SELECT_CC MMX
	setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);

	setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
	// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
	// SjLj exception handling but a light-weight setjmp/longjmp replacement to
	// support continuation, user-level threading, and etc.. As a result, no
	// other SjLj exception interfaces are implemented and please don't build
	// your own exception handling based on them.
	// LLVM/Clang supports zero-cost DWARF exception handling.
	setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
	setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
	setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
	if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
	setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");

	// Darwin ABI issue.
	for (auto VT : { MVT::i32, MVT::i64 }) {
	if (VT == MVT::i64 && !Subtarget.is64Bit())
	continue;
	setOperationAction(ISD::ConstantPool , VT, Custom);
	setOperationAction(ISD::JumpTable , VT, Custom);
	setOperationAction(ISD::GlobalAddress , VT, Custom);
	setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
	setOperationAction(ISD::ExternalSymbol , VT, Custom);
	setOperationAction(ISD::BlockAddress , VT, Custom);
	}

	// 64-bit shl, sra, srl (iff 32-bit x86)
	for (auto VT : { MVT::i32, MVT::i64 }) {
	if (VT == MVT::i64 && !Subtarget.is64Bit())
	continue;
	setOperationAction(ISD::SHL_PARTS, VT, Custom);
	setOperationAction(ISD::SRA_PARTS, VT, Custom);
	setOperationAction(ISD::SRL_PARTS, VT, Custom);
	}

	if (Subtarget.hasSSE1())
	setOperationAction(ISD::PREFETCH , MVT::Other, Legal);

	setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);

	// Expand certain atomics
	for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
	setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
	setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
	}

	if (Subtarget.hasCmpxchg16b()) {
	setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
	}

	// FIXME - use subtarget debug flags
	if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
	!Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
	TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
	setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
	}

	setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
	setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);

	setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
	setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);

	setOperationAction(ISD::TRAP, MVT::Other, Legal);
	setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);

	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
	setOperationAction(ISD::VASTART , MVT::Other, Custom);
	setOperationAction(ISD::VAEND , MVT::Other, Expand);
	bool Is64Bit = Subtarget.is64Bit();
	setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
	setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);

	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);

	setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);

	// GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
	setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
	setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);

	if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
	// f32 and f64 use SSE.
	// Set up the FP register classes.
	addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
	: &X86::FR32RegClass);
	addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
	: &X86::FR64RegClass);

	for (auto VT : { MVT::f32, MVT::f64 }) {
	// Use ANDPD to simulate FABS.
	setOperationAction(ISD::FABS, VT, Custom);

	// Use XORP to simulate FNEG.
	setOperationAction(ISD::FNEG, VT, Custom);

	// Use ANDPD and ORPD to simulate FCOPYSIGN.
	setOperationAction(ISD::FCOPYSIGN, VT, Custom);

	// We don't support sin/cos/fmod
	setOperationAction(ISD::FSIN , VT, Expand);
	setOperationAction(ISD::FCOS , VT, Expand);
	setOperationAction(ISD::FSINCOS, VT, Expand);
	}

	// Lower this to MOVMSK plus an AND.
	setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
	setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);

	// Expand FP immediates into loads from the stack, except for the special
	// cases we handle.
	addLegalFPImmediate(APFloat(+0.0)); // xorpd
	addLegalFPImmediate(APFloat(+0.0f)); // xorps
	} else if (UseX87 && X86ScalarSSEf32) {
	// Use SSE for f32, x87 for f64.
	// Set up the FP register classes.
	addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
	: &X86::FR32RegClass);
	addRegisterClass(MVT::f64, &X86::RFP64RegClass);

	// Use ANDPS to simulate FABS.
	setOperationAction(ISD::FABS , MVT::f32, Custom);

	// Use XORP to simulate FNEG.
	setOperationAction(ISD::FNEG , MVT::f32, Custom);

	setOperationAction(ISD::UNDEF, MVT::f64, Expand);

	// Use ANDPS and ORPS to simulate FCOPYSIGN.
	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);

	// We don't support sin/cos/fmod
	setOperationAction(ISD::FSIN , MVT::f32, Expand);
	setOperationAction(ISD::FCOS , MVT::f32, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f32, Expand);

	// Special cases we handle for FP constants.
	addLegalFPImmediate(APFloat(+0.0f)); // xorps
	addLegalFPImmediate(APFloat(+0.0)); // FLD0
	addLegalFPImmediate(APFloat(+1.0)); // FLD1
	addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
	addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS

	if (!TM.Options.UnsafeFPMath) {
	setOperationAction(ISD::FSIN , MVT::f64, Expand);
	setOperationAction(ISD::FCOS , MVT::f64, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
	}
	} else if (UseX87) {
	// f32 and f64 in x87.
	// Set up the FP register classes.
	addRegisterClass(MVT::f64, &X86::RFP64RegClass);
	addRegisterClass(MVT::f32, &X86::RFP32RegClass);

	for (auto VT : { MVT::f32, MVT::f64 }) {
	setOperationAction(ISD::UNDEF, VT, Expand);
	setOperationAction(ISD::FCOPYSIGN, VT, Expand);

	if (!TM.Options.UnsafeFPMath) {
	setOperationAction(ISD::FSIN , VT, Expand);
	setOperationAction(ISD::FCOS , VT, Expand);
	setOperationAction(ISD::FSINCOS, VT, Expand);
	}
	}
	addLegalFPImmediate(APFloat(+0.0)); // FLD0
	addLegalFPImmediate(APFloat(+1.0)); // FLD1
	addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
	addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
	addLegalFPImmediate(APFloat(+0.0f)); // FLD0
	addLegalFPImmediate(APFloat(+1.0f)); // FLD1
	addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
	addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
	}

	// We don't support FMA.
	setOperationAction(ISD::FMA, MVT::f64, Expand);
	setOperationAction(ISD::FMA, MVT::f32, Expand);

	// Long double always uses X87, except f128 in MMX.
	if (UseX87) {
	if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
	addRegisterClass(MVT::f128, &X86::FR128RegClass);
	ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
	setOperationAction(ISD::FABS , MVT::f128, Custom);
	setOperationAction(ISD::FNEG , MVT::f128, Custom);
	setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
	}

	addRegisterClass(MVT::f80, &X86::RFP80RegClass);
	setOperationAction(ISD::UNDEF, MVT::f80, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
	{
	APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
	addLegalFPImmediate(TmpFlt); // FLD0
	TmpFlt.changeSign();
	addLegalFPImmediate(TmpFlt); // FLD0/FCHS

	bool ignored;
	APFloat TmpFlt2(+1.0);
	TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
	&ignored);
	addLegalFPImmediate(TmpFlt2); // FLD1
	TmpFlt2.changeSign();
	addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
	}

	if (!TM.Options.UnsafeFPMath) {
	setOperationAction(ISD::FSIN , MVT::f80, Expand);
	setOperationAction(ISD::FCOS , MVT::f80, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
	}

	setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
	setOperationAction(ISD::FCEIL, MVT::f80, Expand);
	setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
	setOperationAction(ISD::FRINT, MVT::f80, Expand);
	setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
	setOperationAction(ISD::FMA, MVT::f80, Expand);
	}

	// Always use a library call for pow.
	setOperationAction(ISD::FPOW , MVT::f32 , Expand);
	setOperationAction(ISD::FPOW , MVT::f64 , Expand);
	setOperationAction(ISD::FPOW , MVT::f80 , Expand);

	setOperationAction(ISD::FLOG, MVT::f80, Expand);
	setOperationAction(ISD::FLOG2, MVT::f80, Expand);
	setOperationAction(ISD::FLOG10, MVT::f80, Expand);
	setOperationAction(ISD::FEXP, MVT::f80, Expand);
	setOperationAction(ISD::FEXP2, MVT::f80, Expand);
	setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
	setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);

	// Some FP actions are always expanded for vector types.
	for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
	MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
	setOperationAction(ISD::FSIN, VT, Expand);
	setOperationAction(ISD::FSINCOS, VT, Expand);
	setOperationAction(ISD::FCOS, VT, Expand);
	setOperationAction(ISD::FREM, VT, Expand);
	setOperationAction(ISD::FCOPYSIGN, VT, Expand);
	setOperationAction(ISD::FPOW, VT, Expand);
	setOperationAction(ISD::FLOG, VT, Expand);
	setOperationAction(ISD::FLOG2, VT, Expand);
	setOperationAction(ISD::FLOG10, VT, Expand);
	setOperationAction(ISD::FEXP, VT, Expand);
	setOperationAction(ISD::FEXP2, VT, Expand);
	}

	// First set operation action for all vector types to either promote
	// (for widening) or expand (for scalarization). Then we will selectively
	// turn on ones that can be effectively codegen'd.
	for (MVT VT : MVT::vector_valuetypes()) {
	setOperationAction(ISD::SDIV, VT, Expand);
	setOperationAction(ISD::UDIV, VT, Expand);
	setOperationAction(ISD::SREM, VT, Expand);
	setOperationAction(ISD::UREM, VT, Expand);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
	setOperationAction(ISD::FMA, VT, Expand);
	setOperationAction(ISD::FFLOOR, VT, Expand);
	setOperationAction(ISD::FCEIL, VT, Expand);
	setOperationAction(ISD::FTRUNC, VT, Expand);
	setOperationAction(ISD::FRINT, VT, Expand);
	setOperationAction(ISD::FNEARBYINT, VT, Expand);
	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
	setOperationAction(ISD::MULHS, VT, Expand);
	setOperationAction(ISD::UMUL_LOHI, VT, Expand);
	setOperationAction(ISD::MULHU, VT, Expand);
	setOperationAction(ISD::SDIVREM, VT, Expand);
	setOperationAction(ISD::UDIVREM, VT, Expand);
	setOperationAction(ISD::CTPOP, VT, Expand);
	setOperationAction(ISD::CTTZ, VT, Expand);
	setOperationAction(ISD::CTLZ, VT, Expand);
	setOperationAction(ISD::ROTL, VT, Expand);
	setOperationAction(ISD::ROTR, VT, Expand);
	setOperationAction(ISD::BSWAP, VT, Expand);
	setOperationAction(ISD::SETCC, VT, Expand);
	setOperationAction(ISD::FP_TO_UINT, VT, Expand);
	setOperationAction(ISD::FP_TO_SINT, VT, Expand);
	setOperationAction(ISD::UINT_TO_FP, VT, Expand);
	setOperationAction(ISD::SINT_TO_FP, VT, Expand);
	setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
	setOperationAction(ISD::TRUNCATE, VT, Expand);
	setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
	setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
	setOperationAction(ISD::ANY_EXTEND, VT, Expand);
	setOperationAction(ISD::SELECT_CC, VT, Expand);
	for (MVT InnerVT : MVT::vector_valuetypes()) {
	setTruncStoreAction(InnerVT, VT, Expand);

	setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
	setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);

	// N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
	// types, we have to deal with them whether we ask for Expansion or not.
	// Setting Expand causes its own optimisation problems though, so leave
	// them legal.
	if (VT.getVectorElementType() == MVT::i1)
	setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);

	// EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
	// split/scalarized right now.
	if (VT.getVectorElementType() == MVT::f16)
	setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
	}
	}

	// FIXME: In order to prevent SSE instructions being expanded to MMX ones
	// with -msoft-float, disable use of MMX as well.
	if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
	addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
	// No operations on x86mmx supported, everything uses intrinsics.
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
	addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);

	setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
	setOperationAction(ISD::FABS, MVT::v4f32, Custom);
	setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
	setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
	setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
	addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);

	// FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
	// registers cannot be used even for integer operations.
	addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);
	addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);
	addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);
	addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
	: &X86::VR128RegClass);

	setOperationAction(ISD::MUL, MVT::v16i8, Custom);
	setOperationAction(ISD::MUL, MVT::v4i32, Custom);
	setOperationAction(ISD::MUL, MVT::v2i64, Custom);
	setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
	setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
	setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
	setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
	setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
	setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
	setOperationAction(ISD::MUL, MVT::v8i16, Legal);
	setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
	setOperationAction(ISD::FABS, MVT::v2f64, Custom);
	setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);

	setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
	setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
	setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
	setOperationAction(ISD::UMIN, MVT::v16i8, Legal);

	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);

	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::CTPOP, VT, Custom);
	setOperationAction(ISD::CTTZ, VT, Custom);
	}

	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
	setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	}

	// We support custom legalizing of sext and anyext loads for specific
	// memory vector types which we can load as a scalar (or sequence of
	// scalars) and extend in-register to a legal 128-bit vector type. For sext
	// loads these must work with a single scalar load.
	for (MVT VT : MVT::integer_vector_valuetypes()) {
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
	}

	for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Custom);

	if (VT == MVT::v2i64 && !Subtarget.is64Bit())
	continue;

	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	}

	// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
	setOperationPromotedToType(ISD::AND, VT, MVT::v2i64);
	setOperationPromotedToType(ISD::OR, VT, MVT::v2i64);
	setOperationPromotedToType(ISD::XOR, VT, MVT::v2i64);
	setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64);
	setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
	}

	// Custom lower v2i64 and v2f64 selects.
	setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
	setOperationAction(ISD::SELECT, MVT::v2i64, Custom);

	setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
	setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);

	setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
	setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);

	setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);

	// Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
	setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);

	setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);

	for (MVT VT : MVT::fp_vector_valuetypes())
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);

	setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
	setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
	setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);

	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);

	// In the customized shift lowering, the legal v4i32/v2i64 cases
	// in AVX2 will be recognized.
	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	}
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
	setOperationAction(ISD::ABS, MVT::v16i8, Legal);
	setOperationAction(ISD::ABS, MVT::v8i16, Legal);
	setOperationAction(ISD::ABS, MVT::v4i32, Legal);
	setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
	setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
	setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
	setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
	setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
	for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
	setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
	setOperationAction(ISD::FCEIL, RoundedTy, Legal);
	setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
	setOperationAction(ISD::FRINT, RoundedTy, Legal);
	setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
	}

	setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
	setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
	setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
	setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
	setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
	setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
	setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
	setOperationAction(ISD::UMIN, MVT::v4i32, Legal);

	// FIXME: Do we need to handle scalar-to-vector here?
	setOperationAction(ISD::MUL, MVT::v4i32, Legal);

	// We directly match byte blends in the backend as they match the VSELECT
	// condition form.
	setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);

	// SSE41 brings specific instructions for doing vector sign extend even in
	// cases where we don't have SRA.
	for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
	setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
	}

	for (MVT VT : MVT::integer_vector_valuetypes()) {
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
	}

	// SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
	for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
	setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
	setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
	setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
	setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
	setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
	setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
	}

	// i8 vectors are custom because the source register and source
	// source memory operand types are not the same width.
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
	MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
	setOperationAction(ISD::ROTL, VT, Custom);

	// XOP can efficiently perform BITREVERSE with VPPERM.
	for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
	setOperationAction(ISD::BITREVERSE, VT, Custom);

	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
	MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
	setOperationAction(ISD::BITREVERSE, VT, Custom);
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
	bool HasInt256 = Subtarget.hasInt256();

	addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);
	addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);
	addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);
	addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);
	addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);
	addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
	: &X86::VR256RegClass);

	for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
	setOperationAction(ISD::FFLOOR, VT, Legal);
	setOperationAction(ISD::FCEIL, VT, Legal);
	setOperationAction(ISD::FTRUNC, VT, Legal);
	setOperationAction(ISD::FRINT, VT, Legal);
	setOperationAction(ISD::FNEARBYINT, VT, Legal);
	setOperationAction(ISD::FNEG, VT, Custom);
	setOperationAction(ISD::FABS, VT, Custom);
	setOperationAction(ISD::FCOPYSIGN, VT, Custom);
	}

	// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
	// even though v8i16 is a legal type.
	setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
	setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
	setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);

	setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
	setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
	setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);

	setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);

	for (MVT VT : MVT::fp_vector_valuetypes())
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);

	// In the customized shift lowering, the legal v8i32/v4i64 cases
	// in AVX2 will be recognized.
	for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	}

	setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
	setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
	setOperationAction(ISD::SELECT, MVT::v8f32, Custom);

	for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
	setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
	setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
	setOperationAction(ISD::ANY_EXTEND, VT, Custom);
	}

	setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
	setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);

	for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::CTPOP, VT, Custom);
	setOperationAction(ISD::CTTZ, VT, Custom);
	setOperationAction(ISD::CTLZ, VT, Custom);
	}

	if (Subtarget.hasAnyFMA()) {
	for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
	MVT::v2f64, MVT::v4f64 })
	setOperationAction(ISD::FMA, VT, Legal);
	}

	for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
	setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
	}

	setOperationAction(ISD::MUL, MVT::v4i64, Custom);
	setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::MUL, MVT::v32i8, Custom);

	setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
	setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);

	setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
	setOperationAction(ISD::MULHS, MVT::v32i8, Custom);

	for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
	setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
	setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
	}

	if (HasInt256) {
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom);
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);

	// The custom lowering for UINT_TO_FP for v8i32 becomes interesting
	// when we have a 256bit-wide blend with immediate.
	setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);

	// AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
	for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
	setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
	setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
	setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
	setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
	setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
	setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
	}
	}

	for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
	MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
	setOperationAction(ISD::MLOAD, VT, Legal);
	setOperationAction(ISD::MSTORE, VT, Legal);
	}

	// Extract subvector is special because the value type
	// (result) is 128-bit but the source is 256-bit wide.
	for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
	MVT::v4f32, MVT::v2f64 }) {
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
	}

	// Custom lower several nodes for 256-bit types.
	for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
	MVT::v8f32, MVT::v4f64 }) {
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	}

	if (HasInt256)
	setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);

	// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
	for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
	setOperationPromotedToType(ISD::AND, VT, MVT::v4i64);
	setOperationPromotedToType(ISD::OR, VT, MVT::v4i64);
	setOperationPromotedToType(ISD::XOR, VT, MVT::v4i64);
	setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64);
	setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
	}
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
	addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
	addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
	addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
	addRegisterClass(MVT::v8f64, &X86::VR512RegClass);

	addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
	addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
	addRegisterClass(MVT::v16i1, &X86::VK16RegClass);

	for (MVT VT : MVT::fp_vector_valuetypes())
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);

	for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
	setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
	setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
	setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
	setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
	setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
	setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
	}

	for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16,
	MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32,
	MVT::v8i64, MVT::v32i16, MVT::v64i8}) {
	MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
	setLoadExtAction(ISD::SEXTLOAD, VT, MaskVT, Custom);
	setLoadExtAction(ISD::ZEXTLOAD, VT, MaskVT, Custom);
	setLoadExtAction(ISD::EXTLOAD, VT, MaskVT, Custom);
	setTruncStoreAction(VT, MaskVT, Custom);
	}

	for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
	setOperationAction(ISD::FNEG, VT, Custom);
	setOperationAction(ISD::FABS, VT, Custom);
	setOperationAction(ISD::FMA, VT, Legal);
	setOperationAction(ISD::FCOPYSIGN, VT, Custom);
	}

	setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
	setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
	setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
	setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
	setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
	setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
	setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
	setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
	setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
	setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
	setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
	setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);

	setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
	setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
	setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
	setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
	setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
	if (Subtarget.hasVLX()){
	setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
	setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
	setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
	setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
	setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);

	setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
	setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
	setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
	setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
	setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
	} else {
	for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
	MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
	setOperationAction(ISD::MLOAD, VT, Custom);
	setOperationAction(ISD::MSTORE, VT, Custom);
	}
	}
	setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);

	if (Subtarget.hasDQI()) {
	for (auto VT : { MVT::v2i64, MVT::v4i64, MVT::v8i64 }) {
	setOperationAction(ISD::SINT_TO_FP, VT, Legal);
	setOperationAction(ISD::UINT_TO_FP, VT, Legal);
	setOperationAction(ISD::FP_TO_SINT, VT, Legal);
	setOperationAction(ISD::FP_TO_UINT, VT, Legal);
	}
	if (Subtarget.hasVLX()) {
	// Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion.
	setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
	}
	}
	if (Subtarget.hasVLX()) {
	setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
	setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
	setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
	setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
	setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
	setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
	setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);

	// FIXME. This commands are available on SSE/AVX2, add relevant patterns.
	setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
	}

	setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
	setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
	setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);

	for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
	setOperationAction(ISD::FFLOOR, VT, Legal);
	setOperationAction(ISD::FCEIL, VT, Legal);
	setOperationAction(ISD::FTRUNC, VT, Legal);
	setOperationAction(ISD::FRINT, VT, Legal);
	setOperationAction(ISD::FNEARBYINT, VT, Legal);
	}

	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom);
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);

	// Without BWI we need to use custom lowering to handle MVT::v64i8 input.
	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
	setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);

	setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);

	setOperationAction(ISD::MUL, MVT::v8i64, Custom);

	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
	setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
	setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
	setOperationAction(ISD::SELECT, MVT::v16f32, Custom);

	setOperationAction(ISD::MUL, MVT::v16i32, Legal);

	// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
	setOperationAction(ISD::ABS, MVT::v4i64, Legal);
	setOperationAction(ISD::ABS, MVT::v2i64, Legal);

	for (auto VT : { MVT::v8i1, MVT::v16i1 }) {
	setOperationAction(ISD::ADD, VT, Custom);
	setOperationAction(ISD::SUB, VT, Custom);
	setOperationAction(ISD::MUL, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::TRUNCATE, VT, Custom);

	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Expand);
	}

	for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
	setOperationAction(ISD::SMAX, VT, Legal);
	setOperationAction(ISD::UMAX, VT, Legal);
	setOperationAction(ISD::SMIN, VT, Legal);
	setOperationAction(ISD::UMIN, VT, Legal);
	setOperationAction(ISD::ABS, VT, Legal);
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	setOperationAction(ISD::CTPOP, VT, Custom);
	setOperationAction(ISD::CTTZ, VT, Custom);
	}

	// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
	for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64,
	MVT::v8i64}) {
	setOperationAction(ISD::ROTL, VT, Custom);
	setOperationAction(ISD::ROTR, VT, Custom);
	}

	// Need to promote to 64-bit even though we have 32-bit masked instructions
	// because the IR optimizers rearrange bitcasts around logic ops leaving
	// too many variations to handle if we don't promote them.
	setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
	setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64);
	setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);

	if (Subtarget.hasCDI()) {
	// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
	for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
	MVT::v4i64, MVT::v8i64}) {
	setOperationAction(ISD::CTLZ, VT, Legal);
	setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
	}
	} // Subtarget.hasCDI()

	if (Subtarget.hasDQI()) {
	// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
	setOperationAction(ISD::MUL, MVT::v2i64, Legal);
	setOperationAction(ISD::MUL, MVT::v4i64, Legal);
	setOperationAction(ISD::MUL, MVT::v8i64, Legal);
	}

	if (Subtarget.hasVPOPCNTDQ()) {
	// VPOPCNTDQ sub-targets extend 128/256 vectors to use the avx512
	// version of popcntd/q.
	for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v8i32, MVT::v4i64,
	MVT::v4i32, MVT::v2i64})
	setOperationAction(ISD::CTPOP, VT, Legal);
	}

	// Custom lower several nodes.
	for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
	MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
	setOperationAction(ISD::MGATHER, VT, Custom);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	}
	// Extract subvector is special because the value type
	// (result) is 256-bit but the source is 512-bit wide.
	// 128-bit was made Custom under AVX1.
	for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
	MVT::v8f32, MVT::v4f64, MVT::v1i1 })
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
	for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1,
	MVT::v16i1, MVT::v32i1, MVT::v64i1 })
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);

	for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
	setOperationAction(ISD::MLOAD, VT, Legal);
	setOperationAction(ISD::MSTORE, VT, Legal);
	setOperationAction(ISD::MGATHER, VT, Legal);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	}
	for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
	setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
	setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
	}
	}// has AVX-512

	if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
	addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
	addRegisterClass(MVT::v64i8, &X86::VR512RegClass);

	addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
	addRegisterClass(MVT::v64i1, &X86::VK64RegClass);

	setOperationAction(ISD::ADD, MVT::v32i1, Custom);
	setOperationAction(ISD::ADD, MVT::v64i1, Custom);
	setOperationAction(ISD::SUB, MVT::v32i1, Custom);
	setOperationAction(ISD::SUB, MVT::v64i1, Custom);
	setOperationAction(ISD::MUL, MVT::v32i1, Custom);
	setOperationAction(ISD::MUL, MVT::v64i1, Custom);

	setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
	setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
	setOperationAction(ISD::MUL, MVT::v32i16, Legal);
	setOperationAction(ISD::MUL, MVT::v64i8, Custom);
	setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
	setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
	setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
	setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i1, Custom);
	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
	setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
	setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
	setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
	setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
	setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v32i1, Custom);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v64i1, Custom);
	setOperationAction(ISD::VSELECT, MVT::v32i1, Expand);
	setOperationAction(ISD::VSELECT, MVT::v64i1, Expand);
	setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);

	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);

	setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
	if (Subtarget.hasVLX()) {
	setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
	setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
	}

	LegalizeAction Action = Subtarget.hasVLX() ? Legal : Custom;
	for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
	setOperationAction(ISD::MLOAD, VT, Action);
	setOperationAction(ISD::MSTORE, VT, Action);
	}

	if (Subtarget.hasCDI()) {
	setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
	setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
	}

	for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Custom);
	setOperationAction(ISD::ABS, VT, Legal);
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	setOperationAction(ISD::MLOAD, VT, Legal);
	setOperationAction(ISD::MSTORE, VT, Legal);
	setOperationAction(ISD::CTPOP, VT, Custom);
	setOperationAction(ISD::CTTZ, VT, Custom);
	setOperationAction(ISD::SMAX, VT, Legal);
	setOperationAction(ISD::UMAX, VT, Legal);
	setOperationAction(ISD::SMIN, VT, Legal);
	setOperationAction(ISD::UMIN, VT, Legal);

	setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
	setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
	setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
	}

	for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
	setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
	if (Subtarget.hasVLX()) {
	// FIXME. This commands are available on SSE/AVX2, add relevant patterns.
	setLoadExtAction(ExtType, MVT::v16i16, MVT::v16i8, Legal);
	setLoadExtAction(ExtType, MVT::v8i16, MVT::v8i8, Legal);
	}
	}
	}

	if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
	addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
	addRegisterClass(MVT::v2i1, &X86::VK2RegClass);

	for (auto VT : { MVT::v2i1, MVT::v4i1 }) {
	setOperationAction(ISD::ADD, VT, Custom);
	setOperationAction(ISD::SUB, VT, Custom);
	setOperationAction(ISD::MUL, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Expand);

	setOperationAction(ISD::TRUNCATE, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	}

	setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);

	for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
	setOperationAction(ISD::SMAX, VT, Legal);
	setOperationAction(ISD::UMAX, VT, Legal);
	setOperationAction(ISD::SMIN, VT, Legal);
	setOperationAction(ISD::UMIN, VT, Legal);
	}
	}

	// We want to custom lower some of our intrinsics.
	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
	setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
	if (!Subtarget.is64Bit()) {
	setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
	}

	// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
	// handle type legalization for these operations here.
	//
	// FIXME: We really should do custom legalization for addition and
	// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
	// than generic legalization for 64-bit multiplication-with-overflow, though.
	for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
	if (VT == MVT::i64 && !Subtarget.is64Bit())
	continue;
	// Add/Sub/Mul with overflow operations are custom lowered.
	setOperationAction(ISD::SADDO, VT, Custom);
	setOperationAction(ISD::UADDO, VT, Custom);
	setOperationAction(ISD::SSUBO, VT, Custom);
	setOperationAction(ISD::USUBO, VT, Custom);
	setOperationAction(ISD::SMULO, VT, Custom);
	setOperationAction(ISD::UMULO, VT, Custom);

	// Support carry in as value rather than glue.
	setOperationAction(ISD::ADDCARRY, VT, Custom);
	setOperationAction(ISD::SUBCARRY, VT, Custom);
	setOperationAction(ISD::SETCCCARRY, VT, Custom);
	}

	if (!Subtarget.is64Bit()) {
	// These libcalls are not available in 32-bit.
	setLibcallName(RTLIB::SHL_I128, nullptr);
	setLibcallName(RTLIB::SRL_I128, nullptr);
	setLibcallName(RTLIB::SRA_I128, nullptr);
	}

	// Combine sin / cos into one node or libcall if possible.
	if (Subtarget.hasSinCos()) {
	setLibcallName(RTLIB::SINCOS_F32, "sincosf");
	setLibcallName(RTLIB::SINCOS_F64, "sincos");
	if (Subtarget.isTargetDarwin()) {
	// For MacOSX, we don't want the normal expansion of a libcall to sincos.
	// We want to issue a libcall to __sincos_stret to avoid memory traffic.
	setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
	setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
	}
	}

	if (Subtarget.isTargetWin64()) {
	setOperationAction(ISD::SDIV, MVT::i128, Custom);
	setOperationAction(ISD::UDIV, MVT::i128, Custom);
	setOperationAction(ISD::SREM, MVT::i128, Custom);
	setOperationAction(ISD::UREM, MVT::i128, Custom);
	setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
	setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
	}

	// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
	// is. We should promote the value to 64-bits to solve this.
	// This is what the CRT headers do - `fmodf` is an inline header
	// function casting to f64 and calling `fmod`.
	if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() \|\|
	Subtarget.isTargetWindowsItanium()))
	for (ISD::NodeType Op :
	{ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
	ISD::FLOG10, ISD::FPOW, ISD::FSIN})
	if (isOperationExpand(Op, MVT::f32))
	setOperationAction(Op, MVT::f32, Promote);

	// We have target-specific dag combine patterns for the following nodes:
	setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
	setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
	setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
	setTargetDAGCombine(ISD::BITCAST);
	setTargetDAGCombine(ISD::VSELECT);
	setTargetDAGCombine(ISD::SELECT);
	setTargetDAGCombine(ISD::SHL);
	setTargetDAGCombine(ISD::SRA);
	setTargetDAGCombine(ISD::SRL);
	setTargetDAGCombine(ISD::OR);
	setTargetDAGCombine(ISD::AND);
	setTargetDAGCombine(ISD::ADD);
	setTargetDAGCombine(ISD::FADD);
	setTargetDAGCombine(ISD::FSUB);
	setTargetDAGCombine(ISD::FNEG);
	setTargetDAGCombine(ISD::FMA);
	setTargetDAGCombine(ISD::FMINNUM);
	setTargetDAGCombine(ISD::FMAXNUM);
	setTargetDAGCombine(ISD::SUB);
	setTargetDAGCombine(ISD::LOAD);
	setTargetDAGCombine(ISD::MLOAD);
	setTargetDAGCombine(ISD::STORE);
	setTargetDAGCombine(ISD::MSTORE);
	setTargetDAGCombine(ISD::TRUNCATE);
	setTargetDAGCombine(ISD::ZERO_EXTEND);
	setTargetDAGCombine(ISD::ANY_EXTEND);
	setTargetDAGCombine(ISD::SIGN_EXTEND);
	setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
	setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
	setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
	setTargetDAGCombine(ISD::SINT_TO_FP);
	setTargetDAGCombine(ISD::UINT_TO_FP);
	setTargetDAGCombine(ISD::SETCC);
	setTargetDAGCombine(ISD::MUL);
	setTargetDAGCombine(ISD::XOR);
	setTargetDAGCombine(ISD::MSCATTER);
	setTargetDAGCombine(ISD::MGATHER);

	computeRegisterProperties(Subtarget.getRegisterInfo());

	MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
	MaxStoresPerMemsetOptSize = 8;
	MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
	MaxStoresPerMemcpyOptSize = 4;
	MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
	MaxStoresPerMemmoveOptSize = 4;

	// TODO: These control memcmp expansion in CGP and could be raised higher, but
	// that needs to benchmarked and balanced with the potential use of vector
	// load/store types (PR33329, PR33914).
	MaxLoadsPerMemcmp = 2;
	MaxLoadsPerMemcmpOptSize = 2;

	// Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
	setPrefLoopAlignment(ExperimentalPrefLoopAlignment);

	// An out-of-order CPU can speculatively execute past a predictable branch,
	// but a conditional move could be stalled by an expensive earlier operation.
	PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
	EnableExtLdPromotion = true;
	setPrefFunctionAlignment(4); // 2^4 bytes.

	verifyIntrinsicTables();
	}

	// This has so far only been implemented for 64-bit MachO.
	bool X86TargetLowering::useLoadStackGuardNode() const {
	return Subtarget.isTargetMachO() && Subtarget.is64Bit();
	}

	TargetLoweringBase::LegalizeTypeAction
	X86TargetLowering::getPreferredVectorAction(EVT VT) const {
	if (ExperimentalVectorWideningLegalization &&
	VT.getVectorNumElements() != 1 &&
	VT.getVectorElementType().getSimpleVT() != MVT::i1)
	return TypeWidenVector;

	return TargetLoweringBase::getPreferredVectorAction(VT);
	}

	EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
	LLVMContext& Context,
	EVT VT) const {
	if (!VT.isVector())
	return MVT::i8;

	if (VT.isSimple()) {
	MVT VVT = VT.getSimpleVT();
	const unsigned NumElts = VVT.getVectorNumElements();
	MVT EltVT = VVT.getVectorElementType();
	if (VVT.is512BitVector()) {
	if (Subtarget.hasAVX512())
	if (EltVT == MVT::i32 \|\| EltVT == MVT::i64 \|\|
	EltVT == MVT::f32 \|\| EltVT == MVT::f64)
	switch(NumElts) {
	case 8: return MVT::v8i1;
	case 16: return MVT::v16i1;
	}
	if (Subtarget.hasBWI())
	if (EltVT == MVT::i8 \|\| EltVT == MVT::i16)
	switch(NumElts) {
	case 32: return MVT::v32i1;
	case 64: return MVT::v64i1;
	}
	}

	if (Subtarget.hasBWI() && Subtarget.hasVLX())
	return MVT::getVectorVT(MVT::i1, NumElts);

	if (!isTypeLegal(VT) && getTypeAction(Context, VT) == TypePromoteInteger) {
	EVT LegalVT = getTypeToTransformTo(Context, VT);
	EltVT = LegalVT.getVectorElementType().getSimpleVT();
	}

	if (Subtarget.hasVLX() && EltVT.getSizeInBits() >= 32)
	switch(NumElts) {
	case 2: return MVT::v2i1;
	case 4: return MVT::v4i1;
	case 8: return MVT::v8i1;
	}
	}

	return VT.changeVectorElementTypeToInteger();
	}

	/// Helper for getByValTypeAlignment to determine
	/// the desired ByVal argument alignment.
	static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
	if (MaxAlign == 16)
	return;
	if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
	if (VTy->getBitWidth() == 128)
	MaxAlign = 16;
	} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
	unsigned EltAlign = 0;
	getMaxByValAlign(ATy->getElementType(), EltAlign);
	if (EltAlign > MaxAlign)
	MaxAlign = EltAlign;
	} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
	for (auto *EltTy : STy->elements()) {
	unsigned EltAlign = 0;
	getMaxByValAlign(EltTy, EltAlign);
	if (EltAlign > MaxAlign)
	MaxAlign = EltAlign;
	if (MaxAlign == 16)
	break;
	}
	}
	}

	/// Return the desired alignment for ByVal aggregate
	/// function arguments in the caller parameter area. For X86, aggregates
	/// that contain SSE vectors are placed at 16-byte boundaries while the rest
	/// are at 4-byte boundaries.
	unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
	const DataLayout &DL) const {
	if (Subtarget.is64Bit()) {
	// Max of 8 and alignment of type.
	unsigned TyAlign = DL.getABITypeAlignment(Ty);
	if (TyAlign > 8)
	return TyAlign;
	return 8;
	}

	unsigned Align = 4;
	if (Subtarget.hasSSE1())
	getMaxByValAlign(Ty, Align);
	return Align;
	}

	/// Returns the target specific optimal type for load
	/// and store operations as a result of memset, memcpy, and memmove
	/// lowering. If DstAlign is zero that means it's safe to destination
	/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
	/// means there isn't a need to check it against alignment requirement,
	/// probably because the source does not need to be loaded. If 'IsMemset' is
	/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
	/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
	/// source is constant so it does not need to be loaded.
	/// It returns EVT::Other if the type should be determined using generic
	/// target-independent logic.
	EVT
	X86TargetLowering::getOptimalMemOpType(uint64_t Size,
	unsigned DstAlign, unsigned SrcAlign,
	bool IsMemset, bool ZeroMemset,
	bool MemcpyStrSrc,
	MachineFunction &MF) const {
	const Function *F = MF.getFunction();
	if (!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
	if (Size >= 16 &&
	(!Subtarget.isUnalignedMem16Slow() \|\|
	((DstAlign == 0 \|\| DstAlign >= 16) &&
	(SrcAlign == 0 \|\| SrcAlign >= 16)))) {
	// FIXME: Check if unaligned 32-byte accesses are slow.
	if (Size >= 32 && Subtarget.hasAVX()) {
	// Although this isn't a well-supported type for AVX1, we'll let
	// legalization and shuffle lowering produce the optimal codegen. If we
	// choose an optimal type with a vector element larger than a byte,
	// getMemsetStores() may create an intermediate splat (using an integer
	// multiply) before we splat as a vector.
	return MVT::v32i8;
	}
	if (Subtarget.hasSSE2())
	return MVT::v16i8;
	// TODO: Can SSE1 handle a byte vector?
	if (Subtarget.hasSSE1())
	return MVT::v4f32;
	} else if ((!IsMemset \|\| ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
	!Subtarget.is64Bit() && Subtarget.hasSSE2()) {
	// Do not use f64 to lower memcpy if source is string constant. It's
	// better to use i32 to avoid the loads.
	// Also, do not use f64 to lower memset unless this is a memset of zeros.
	// The gymnastics of splatting a byte value into an XMM register and then
	// only using 8-byte stores (because this is a CPU with slow unaligned
	// 16-byte accesses) makes that a loser.
	return MVT::f64;
	}
	}
	// This is a compromise. If we reach here, unaligned accesses may be slow on
	// this target. However, creating smaller, aligned accesses could be even
	// slower and would certainly be a lot more code.
	if (Subtarget.is64Bit() && Size >= 8)
	return MVT::i64;
	return MVT::i32;
	}

	bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
	if (VT == MVT::f32)
	return X86ScalarSSEf32;
	else if (VT == MVT::f64)
	return X86ScalarSSEf64;
	return true;
	}

	bool
	X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
	unsigned,
	unsigned,
	bool *Fast) const {
	if (Fast) {
	switch (VT.getSizeInBits()) {
	default:
	// 8-byte and under are always assumed to be fast.
	*Fast = true;
	break;
	case 128:
	*Fast = !Subtarget.isUnalignedMem16Slow();
	break;
	case 256:
	*Fast = !Subtarget.isUnalignedMem32Slow();
	break;
	// TODO: What about AVX-512 (512-bit) accesses?
	}
	}
	// Misaligned accesses of any size are always allowed.
	return true;
	}

	/// Return the entry encoding for a jump table in the
	/// current function. The returned value is a member of the
	/// MachineJumpTableInfo::JTEntryKind enum.
	unsigned X86TargetLowering::getJumpTableEncoding() const {
	// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
	// symbol.
	if (isPositionIndependent() && Subtarget.isPICStyleGOT())
	return MachineJumpTableInfo::EK_Custom32;

	// Otherwise, use the normal jump table encoding heuristics.
	return TargetLowering::getJumpTableEncoding();
	}

	bool X86TargetLowering::useSoftFloat() const {
	return Subtarget.useSoftFloat();
	}

	void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
	ArgListTy &Args) const {

	// Only relabel X86-32 for C / Stdcall CCs.
	if (Subtarget.is64Bit())
	return;
	if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
	return;
	unsigned ParamRegs = 0;
	if (auto *M = MF->getFunction()->getParent())
	ParamRegs = M->getNumberRegisterParameters();

	// Mark the first N int arguments as having reg
	for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
	Type *T = Args[Idx].Ty;
	if (T->isPointerTy() \|\| T->isIntegerTy())
	if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
	unsigned numRegs = 1;
	if (MF->getDataLayout().getTypeAllocSize(T) > 4)
	numRegs = 2;
	if (ParamRegs < numRegs)
	return;
	ParamRegs -= numRegs;
	Args[Idx].IsInReg = true;
	}
	}
	}

	const MCExpr *
	X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
	const MachineBasicBlock *MBB,
	unsigned uid,MCContext &Ctx) const{
	assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
	// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
	// entries.
	return MCSymbolRefExpr::create(MBB->getSymbol(),
	MCSymbolRefExpr::VK_GOTOFF, Ctx);
	}

	/// Returns relocation base for the given PIC jumptable.
	SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
	SelectionDAG &DAG) const {
	if (!Subtarget.is64Bit())
	// This doesn't have SDLoc associated with it, but is not really the
	// same as a Register.
	return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
	getPointerTy(DAG.getDataLayout()));
	return Table;
	}

	/// This returns the relocation base for the given PIC jumptable,
	/// the same as getPICJumpTableRelocBase, but as an MCExpr.
	const MCExpr *X86TargetLowering::
	getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
	MCContext &Ctx) const {
	// X86-64 uses RIP relative addressing based on the jump table label.
	if (Subtarget.isPICStyleRIPRel())
	return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);

	// Otherwise, the reference is relative to the PIC base.
	return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
	}

	std::pair<const TargetRegisterClass *, uint8_t>
	X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
	MVT VT) const {
	const TargetRegisterClass *RRC = nullptr;
	uint8_t Cost = 1;
	switch (VT.SimpleTy) {
	default:
	return TargetLowering::findRepresentativeClass(TRI, VT);
	case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
	RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
	break;
	case MVT::x86mmx:
	RRC = &X86::VR64RegClass;
	break;
	case MVT::f32: case MVT::f64:
	case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
	case MVT::v4f32: case MVT::v2f64:
	case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
	case MVT::v8f32: case MVT::v4f64:
	case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
	case MVT::v16f32: case MVT::v8f64:
	RRC = &X86::VR128XRegClass;
	break;
	}
	return std::make_pair(RRC, Cost);
	}

	unsigned X86TargetLowering::getAddressSpace() const {
	if (Subtarget.is64Bit())
	return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
	return 256;
	}

	static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
	return TargetTriple.isOSGlibc() \|\| TargetTriple.isOSFuchsia() \|\|
	(TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
	}

	static Constant* SegmentOffset(IRBuilder<> &IRB,
	unsigned Offset, unsigned AddressSpace) {
	return ConstantExpr::getIntToPtr(
	ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
	Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
	}

	Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
	// glibc, bionic, and Fuchsia have a special slot for the stack guard in
	// tcbhead_t; use it instead of the usual global variable (see
	// sysdeps/{i386,x86_64}/nptl/tls.h)
	if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
	if (Subtarget.isTargetFuchsia()) {
	// <magenta/tls.h> defines MX_TLS_STACK_GUARD_OFFSET with this value.
	return SegmentOffset(IRB, 0x10, getAddressSpace());
	} else {
	// %fs:0x28, unless we're using a Kernel code model, in which case
	// it's %gs:0x28. gs:0x14 on i386.
	unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
	return SegmentOffset(IRB, Offset, getAddressSpace());
	}
	}

	return TargetLowering::getIRStackGuard(IRB);
	}

	void X86TargetLowering::insertSSPDeclarations(Module &M) const {
	// MSVC CRT provides functionalities for stack protection.
	if (Subtarget.getTargetTriple().isOSMSVCRT()) {
	// MSVC CRT has a global variable holding security cookie.
	M.getOrInsertGlobal("__security_cookie",
	Type::getInt8PtrTy(M.getContext()));

	// MSVC CRT has a function to validate security cookie.
	auto *SecurityCheckCookie = cast<Function>(
	M.getOrInsertFunction("__security_check_cookie",
	Type::getVoidTy(M.getContext()),
	Type::getInt8PtrTy(M.getContext())));
	SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
	SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
	return;
	}
	// glibc, bionic, and Fuchsia have a special slot for the stack guard.
	if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
	return;
	TargetLowering::insertSSPDeclarations(M);
	}

	Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
	// MSVC CRT has a global variable holding security cookie.
	if (Subtarget.getTargetTriple().isOSMSVCRT())
	return M.getGlobalVariable("__security_cookie");
	return TargetLowering::getSDagStackGuard(M);
	}

	Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
	// MSVC CRT has a function to validate security cookie.
	if (Subtarget.getTargetTriple().isOSMSVCRT())
	return M.getFunction("__security_check_cookie");
	return TargetLowering::getSSPStackGuardCheck(M);
	}

	Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
	if (Subtarget.getTargetTriple().isOSContiki())
	return getDefaultSafeStackPointerLocation(IRB, false);

	// Android provides a fixed TLS slot for the SafeStack pointer. See the
	// definition of TLS_SLOT_SAFESTACK in
	// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
	if (Subtarget.isTargetAndroid()) {
	// %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
	// %gs:0x24 on i386
	unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
	return SegmentOffset(IRB, Offset, getAddressSpace());
	}

	// Fuchsia is similar.
	if (Subtarget.isTargetFuchsia()) {
	// <magenta/tls.h> defines MX_TLS_UNSAFE_SP_OFFSET with this value.
	return SegmentOffset(IRB, 0x18, getAddressSpace());
	}

	return TargetLowering::getSafeStackPointerLocation(IRB);
	}

	bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
	unsigned DestAS) const {
	assert(SrcAS != DestAS && "Expected different address spaces!");

	return SrcAS < 256 && DestAS < 256;
	}

	//===----------------------------------------------------------------------===//
	// Return Value Calling Convention Implementation
	//===----------------------------------------------------------------------===//

	#include "X86GenCallingConv.inc"

	bool X86TargetLowering::CanLowerReturn(
	CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
	return CCInfo.CheckReturn(Outs, RetCC_X86);
	}

	const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
	static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
	return ScratchRegs;
	}

	/// Lowers masks values (v*i1) to the local register values
	/// \returns DAG node after lowering to register type
	static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
	const SDLoc &Dl, SelectionDAG &DAG) {
	EVT ValVT = ValArg.getValueType();

	if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 \|\| ValLoc == MVT::i32)) \|\|
	(ValVT == MVT::v16i1 && (ValLoc == MVT::i16 \|\| ValLoc == MVT::i32))) {
	// Two stage lowering might be required
	// bitcast: v8i1 -> i8 / v16i1 -> i16
	// anyextend: i8 -> i32 / i16 -> i32
	EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
	SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
	if (ValLoc == MVT::i32)
	ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
	return ValToCopy;
	} else if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) \|\|
	(ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
	// One stage lowering is required
	// bitcast: v32i1 -> i32 / v64i1 -> i64
	return DAG.getBitcast(ValLoc, ValArg);
	} else
	return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg);
	}

	/// Breaks v64i1 value into two registers and adds the new node to the DAG
	static void Passv64i1ArgInRegs(
	const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
	SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
	CCValAssign &NextVA, const X86Subtarget &Subtarget) {
	assert((Subtarget.hasBWI() \|\| Subtarget.hasBMI()) &&
	"Expected AVX512BW or AVX512BMI target!");
	assert(Subtarget.is32Bit() && "Expecting 32 bit target");
	assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
	assert(VA.isRegLoc() && NextVA.isRegLoc() &&
	"The value should reside in two registers");

	// Before splitting the value we cast it to i64
	Arg = DAG.getBitcast(MVT::i64, Arg);

	// Splitting the value into two i32 types
	SDValue Lo, Hi;
	Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
	DAG.getConstant(0, Dl, MVT::i32));
	Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
	DAG.getConstant(1, Dl, MVT::i32));

	// Attach the two i32 types into corresponding registers
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
	RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
	}

	SDValue
	X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
	bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &dl, SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();

	// In some cases we need to disable registers from the default CSR list.
	// For example, when they are used for argument passing.
	bool ShouldDisableCalleeSavedRegister =
	CallConv == CallingConv::X86_RegCall \|\|
	MF.getFunction()->hasFnAttribute("no_caller_saved_registers");

	if (CallConv == CallingConv::X86_INTR && !Outs.empty())
	report_fatal_error("X86 interrupts may not return any value");

	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
	CCInfo.AnalyzeReturn(Outs, RetCC_X86);

	SDValue Flag;
	SmallVector<SDValue, 6> RetOps;
	RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
	// Operand #1 = Bytes To Pop
	RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
	MVT::i32));

	// Copy the result values into the output registers.
	for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
	++I, ++OutsIndex) {
	CCValAssign &VA = RVLocs[I];
	assert(VA.isRegLoc() && "Can only return in registers!");

	// Add the register to the CalleeSaveDisableRegs list.
	if (ShouldDisableCalleeSavedRegister)
	MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());

	SDValue ValToCopy = OutVals[OutsIndex];
	EVT ValVT = ValToCopy.getValueType();

	// Promote values to the appropriate types.
	if (VA.getLocInfo() == CCValAssign::SExt)
	ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
	else if (VA.getLocInfo() == CCValAssign::ZExt)
	ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
	else if (VA.getLocInfo() == CCValAssign::AExt) {
	if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
	ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
	else
	ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
	}
	else if (VA.getLocInfo() == CCValAssign::BCvt)
	ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);

	assert(VA.getLocInfo() != CCValAssign::FPExt &&
	"Unexpected FP-extend for return value.");

	// If this is x86-64, and we disabled SSE, we can't return FP values,
	// or SSE or MMX vectors.
	if ((ValVT == MVT::f32 \|\| ValVT == MVT::f64 \|\|
	VA.getLocReg() == X86::XMM0 \|\| VA.getLocReg() == X86::XMM1) &&
	(Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
	errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
	VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
	} else if (ValVT == MVT::f64 &&
	(Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
	// Likewise we can't return F64 values with SSE1 only. gcc does so, but
	// llvm-gcc has never done it right and no one has noticed, so this
	// should be OK for now.
	errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
	VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
	}

	// Returns in ST0/ST1 are handled specially: these are pushed as operands to
	// the RET instruction and handled by the FP Stackifier.
	if (VA.getLocReg() == X86::FP0 \|\|
	VA.getLocReg() == X86::FP1) {
	// If this is a copy from an xmm register to ST(0), use an FPExtend to
	// change the value to the FP stack register class.
	if (isScalarFPTypeInSSEReg(VA.getValVT()))
	ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
	RetOps.push_back(ValToCopy);
	// Don't emit a copytoreg.
	continue;
	}

	// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
	// which is returned in RAX / RDX.
	if (Subtarget.is64Bit()) {
	if (ValVT == MVT::x86mmx) {
	if (VA.getLocReg() == X86::XMM0 \|\| VA.getLocReg() == X86::XMM1) {
	ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
	ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
	ValToCopy);
	// If we don't have SSE2 available, convert to v4f32 so the generated
	// register is legal.
	if (!Subtarget.hasSSE2())
	ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
	}
	}
	}

	SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;

	if (VA.needsCustom()) {
	assert(VA.getValVT() == MVT::v64i1 &&
	"Currently the only custom case is when we split v64i1 to 2 regs");

	Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
	Subtarget);

	assert(2 == RegsToPass.size() &&
	"Expecting two registers after Pass64BitArgInRegs");

	// Add the second register to the CalleeSaveDisableRegs list.
	if (ShouldDisableCalleeSavedRegister)
	MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
	} else {
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
	}

	// Add nodes to the DAG and add the values into the RetOps list
	for (auto &Reg : RegsToPass) {
	Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
	Flag = Chain.getValue(1);
	RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
	}
	}

	// Swift calling convention does not require we copy the sret argument
	// into %rax/%eax for the return, and SRetReturnReg is not set for Swift.

	// All x86 ABIs require that for returning structs by value we copy
	// the sret argument into %rax/%eax (depending on ABI) for the return.
	// We saved the argument into a virtual register in the entry block,
	// so now we copy the value out and into %rax/%eax.
	//
	// Checking Function.hasStructRetAttr() here is insufficient because the IR
	// may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
	// false, then an sret argument may be implicitly inserted in the SelDAG. In
	// either case FuncInfo->setSRetReturnReg() will have been called.
	if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
	// When we have both sret and another return value, we should use the
	// original Chain stored in RetOps[0], instead of the current Chain updated
	// in the above loop. If we only have sret, RetOps[0] equals to Chain.

	// For the case of sret and another return value, we have
	// Chain_0 at the function entry
	// Chain_1 = getCopyToReg(Chain_0) in the above loop
	// If we use Chain_1 in getCopyFromReg, we will have
	// Val = getCopyFromReg(Chain_1)
	// Chain_2 = getCopyToReg(Chain_1, Val) from below

	// getCopyToReg(Chain_0) will be glued together with
	// getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
	// in Unit B, and we will have cyclic dependency between Unit A and Unit B:
	// Data dependency from Unit B to Unit A due to usage of Val in
	// getCopyToReg(Chain_1, Val)
	// Chain dependency from Unit A to Unit B

	// So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
	SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
	getPointerTy(MF.getDataLayout()));

	unsigned RetValReg
	= (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
	X86::RAX : X86::EAX;
	Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
	Flag = Chain.getValue(1);

	// RAX/EAX now acts like a return value.
	RetOps.push_back(
	DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));

	// Add the returned register to the CalleeSaveDisableRegs list.
	if (ShouldDisableCalleeSavedRegister)
	MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
	}

	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
	const MCPhysReg *I =
	TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
	if (I) {
	for (; *I; ++I) {
	if (X86::GR64RegClass.contains(*I))
	RetOps.push_back(DAG.getRegister(*I, MVT::i64));
	else
	llvm_unreachable("Unexpected register class in CSRsViaCopy!");
	}
	}

	RetOps[0] = Chain; // Update chain.

	// Add the flag if we have it.
	if (Flag.getNode())
	RetOps.push_back(Flag);

	X86ISD::NodeType opcode = X86ISD::RET_FLAG;
	if (CallConv == CallingConv::X86_INTR)
	opcode = X86ISD::IRET;
	return DAG.getNode(opcode, dl, MVT::Other, RetOps);
	}

	bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
	if (N->getNumValues() != 1 \|\| !N->hasNUsesOfValue(1, 0))
	return false;

	SDValue TCChain = Chain;
	SDNode Copy = N->use_begin();
	if (Copy->getOpcode() == ISD::CopyToReg) {
	// If the copy has a glue operand, we conservatively assume it isn't safe to
	// perform a tail call.
	if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
	return false;
	TCChain = Copy->getOperand(0);
	} else if (Copy->getOpcode() != ISD::FP_EXTEND)
	return false;

	bool HasRet = false;
	for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
	UI != UE; ++UI) {
	if (UI->getOpcode() != X86ISD::RET_FLAG)
	return false;
	// If we are returning more than one value, we can definitely
	// not make a tail call see PR19530
	if (UI->getNumOperands() > 4)
	return false;
	if (UI->getNumOperands() == 4 &&
	UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
	return false;
	HasRet = true;
	}

	if (!HasRet)
	return false;

	Chain = TCChain;
	return true;
	}

	EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
	ISD::NodeType ExtendKind) const {
	MVT ReturnMVT = MVT::i32;

	bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
	if (VT == MVT::i1 \|\| (!Darwin && (VT == MVT::i8 \|\| VT == MVT::i16))) {
	// The ABI does not require i1, i8 or i16 to be extended.
	//
	// On Darwin, there is code in the wild relying on Clang's old behaviour of
	// always extending i8/i16 return values, so keep doing that for now.
	// (PR26665).
	ReturnMVT = MVT::i8;
	}

	EVT MinVT = getRegisterType(Context, ReturnMVT);
	return VT.bitsLT(MinVT) ? MinVT : VT;
	}

	/// Reads two 32 bit registers and creates a 64 bit mask value.
	/// \param VA The current 32 bit value that need to be assigned.
	/// \param NextVA The next 32 bit value that need to be assigned.
	/// \param Root The parent DAG node.
	/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
	/// glue purposes. In the case the DAG is already using
	/// physical register instead of virtual, we should glue
	/// our new SDValue to InFlag SDvalue.
	/// \return a new SDvalue of size 64bit.
	static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
	SDValue &Root, SelectionDAG &DAG,
	const SDLoc &Dl, const X86Subtarget &Subtarget,
	SDValue *InFlag = nullptr) {
	assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
	assert(Subtarget.is32Bit() && "Expecting 32 bit target");
	assert(VA.getValVT() == MVT::v64i1 &&
	"Expecting first location of 64 bit width type");
	assert(NextVA.getValVT() == VA.getValVT() &&
	"The locations should have the same type");
	assert(VA.isRegLoc() && NextVA.isRegLoc() &&
	"The values should reside in two registers");

	SDValue Lo, Hi;
	unsigned Reg;
	SDValue ArgValueLo, ArgValueHi;

	MachineFunction &MF = DAG.getMachineFunction();
	const TargetRegisterClass *RC = &X86::GR32RegClass;

	// Read a 32 bit value from the registers
	if (nullptr == InFlag) {
	// When no physical register is present,
	// create an intermediate virtual register
	Reg = MF.addLiveIn(VA.getLocReg(), RC);
	ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
	Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
	ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
	} else {
	// When a physical register is available read the value from it and glue
	// the reads together.
	ArgValueLo =
	DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
	*InFlag = ArgValueLo.getValue(2);
	ArgValueHi =
	DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
	*InFlag = ArgValueHi.getValue(2);
	}

	// Convert the i32 type into v32i1 type
	Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);

	// Convert the i32 type into v32i1 type
	Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);

	// Concatenate the two values together
	return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
	}

	/// The function will lower a register of various sizes (8/16/32/64)
	/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
	/// \returns a DAG node contains the operand after lowering to mask type.
	static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
	const EVT &ValLoc, const SDLoc &Dl,
	SelectionDAG &DAG) {
	SDValue ValReturned = ValArg;

	if (ValVT == MVT::v1i1)
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);

	if (ValVT == MVT::v64i1) {
	// In 32 bit machine, this case is handled by getv64i1Argument
	assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
	// In 64 bit machine, There is no need to truncate the value only bitcast
	} else {
	MVT maskLen;
	switch (ValVT.getSimpleVT().SimpleTy) {
	case MVT::v8i1:
	maskLen = MVT::i8;
	break;
	case MVT::v16i1:
	maskLen = MVT::i16;
	break;
	case MVT::v32i1:
	maskLen = MVT::i32;
	break;
	default:
	llvm_unreachable("Expecting a vector of i1 types");
	}

	ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
	}
	return DAG.getBitcast(ValVT, ValReturned);
	}

	/// Lower the result values of a call into the
	/// appropriate copies out of appropriate physical registers.
	///
	SDValue X86TargetLowering::LowerCallResult(
	SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
	uint32_t *RegMask) const {

	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
	// Assign locations to each value returned by this call.
	SmallVector<CCValAssign, 16> RVLocs;
	bool Is64Bit = Subtarget.is64Bit();
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());
	CCInfo.AnalyzeCallResult(Ins, RetCC_X86);

	// Copy all of the result registers out of their specified physreg.
	for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
	++I, ++InsIndex) {
	CCValAssign &VA = RVLocs[I];
	EVT CopyVT = VA.getLocVT();

	// In some calling conventions we need to remove the used registers
	// from the register mask.
	if (RegMask) {
	for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /IncludeSelf=/true);
	SubRegs.isValid(); ++SubRegs)
	RegMask[SubRegs / 32] &= ~(1u << (SubRegs % 32));
	}

	// If this is x86-64, and we disabled SSE, we can't return FP values
	if ((CopyVT == MVT::f32 \|\| CopyVT == MVT::f64 \|\| CopyVT == MVT::f128) &&
	((Is64Bit \|\| Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
	errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
	VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
	}

	// If we prefer to use the value in xmm registers, copy it out as f80 and
	// use a truncate to move it from fp stack reg to xmm reg.
	bool RoundAfterCopy = false;
	if ((VA.getLocReg() == X86::FP0 \|\| VA.getLocReg() == X86::FP1) &&
	isScalarFPTypeInSSEReg(VA.getValVT())) {
	if (!Subtarget.hasX87())
	report_fatal_error("X87 register return with X87 disabled");
	CopyVT = MVT::f80;
	RoundAfterCopy = (CopyVT != VA.getLocVT());
	}

	SDValue Val;
	if (VA.needsCustom()) {
	assert(VA.getValVT() == MVT::v64i1 &&
	"Currently the only custom case is when we split v64i1 to 2 regs");
	Val =
	getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
	} else {
	Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
	.getValue(1);
	Val = Chain.getValue(0);
	InFlag = Chain.getValue(2);
	}

	if (RoundAfterCopy)
	Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
	// This truncation won't change the value.
	DAG.getIntPtrConstant(1, dl));

	if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
	if (VA.getValVT().isVector() &&
	((VA.getLocVT() == MVT::i64) \|\| (VA.getLocVT() == MVT::i32) \|\|
	(VA.getLocVT() == MVT::i16) \|\| (VA.getLocVT() == MVT::i8))) {
	// promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
	Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
	} else
	Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
	}

	InVals.push_back(Val);
	}

	return Chain;
	}

	//===----------------------------------------------------------------------===//
	// C & StdCall & Fast Calling Convention implementation
	//===----------------------------------------------------------------------===//
	// StdCall calling convention seems to be standard for many Windows' API
	// routines and around. It differs from C calling convention just a little:
	// callee should clean up the stack, not caller. Symbols should be also
	// decorated in some fancy way :) It doesn't support any vector arguments.
	// For info on fast calling convention see Fast Calling Convention (tail call)
	// implementation LowerX86_32FastCCCallTo.

	/// CallIsStructReturn - Determines whether a call uses struct return
	/// semantics.
	enum StructReturnType {
	NotStructReturn,
	RegStructReturn,
	StackStructReturn
	};
	static StructReturnType
	callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
	if (Outs.empty())
	return NotStructReturn;

	const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
	if (!Flags.isSRet())
	return NotStructReturn;
	if (Flags.isInReg() \|\| IsMCU)
	return RegStructReturn;
	return StackStructReturn;
	}

	/// Determines whether a function uses struct return semantics.
	static StructReturnType
	argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
	if (Ins.empty())
	return NotStructReturn;

	const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
	if (!Flags.isSRet())
	return NotStructReturn;
	if (Flags.isInReg() \|\| IsMCU)
	return RegStructReturn;
	return StackStructReturn;
	}

	/// Make a copy of an aggregate at address specified by "Src" to address
	/// "Dst" with size and alignment information specified by the specific
	/// parameter attribute. The copy will be passed as a byval function parameter.
	static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
	SDValue Chain, ISD::ArgFlagsTy Flags,
	SelectionDAG &DAG, const SDLoc &dl) {
	SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);

	return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
	/isVolatile/false, /AlwaysInline=/true,
	/isTailCall/false,
	MachinePointerInfo(), MachinePointerInfo());
	}

	/// Return true if the calling convention is one that we can guarantee TCO for.
	static bool canGuaranteeTCO(CallingConv::ID CC) {
	return (CC == CallingConv::Fast \|\| CC == CallingConv::GHC \|\|
	CC == CallingConv::X86_RegCall \|\| CC == CallingConv::HiPE \|\|
	CC == CallingConv::HHVM);
	}

	/// Return true if we might ever do TCO for calls with this calling convention.
	static bool mayTailCallThisCC(CallingConv::ID CC) {
	switch (CC) {
	// C calling conventions:
	case CallingConv::C:
	case CallingConv::Win64:
	case CallingConv::X86_64_SysV:
	// Callee pop conventions:
	case CallingConv::X86_ThisCall:
	case CallingConv::X86_StdCall:
	case CallingConv::X86_VectorCall:
	case CallingConv::X86_FastCall:
	return true;
	default:
	return canGuaranteeTCO(CC);
	}
	}

	/// Return true if the function is being made into a tailcall target by
	/// changing its ABI.
	static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
	return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
	}

	bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
	auto Attr =
	CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
	if (!CI->isTailCall() \|\| Attr.getValueAsString() == "true")
	return false;

	ImmutableCallSite CS(CI);
	CallingConv::ID CalleeCC = CS.getCallingConv();
	if (!mayTailCallThisCC(CalleeCC))
	return false;

	return true;
	}

	SDValue
	X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
	const SmallVectorImpl<ISD::InputArg> &Ins,
	const SDLoc &dl, SelectionDAG &DAG,
	const CCValAssign &VA,
	MachineFrameInfo &MFI, unsigned i) const {
	// Create the nodes corresponding to a load from this parameter slot.
	ISD::ArgFlagsTy Flags = Ins[i].Flags;
	bool AlwaysUseMutable = shouldGuaranteeTCO(
	CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
	bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
	EVT ValVT;
	MVT PtrVT = getPointerTy(DAG.getDataLayout());

	// If value is passed by pointer we have address passed instead of the value
	// itself. No need to extend if the mask value and location share the same
	// absolute size.
	bool ExtendedInMem =
	VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
	VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();

	if (VA.getLocInfo() == CCValAssign::Indirect \|\| ExtendedInMem)
	ValVT = VA.getLocVT();
	else
	ValVT = VA.getValVT();

	// Calculate SP offset of interrupt parameter, re-arrange the slot normally
	// taken by a return address.
	int Offset = 0;
	if (CallConv == CallingConv::X86_INTR) {
	// X86 interrupts may take one or two arguments.
	// On the stack there will be no return address as in regular call.
	// Offset of last argument need to be set to -4/-8 bytes.
	// Where offset of the first argument out of two, should be set to 0 bytes.
	Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
	if (Subtarget.is64Bit() && Ins.size() == 2) {
	// The stack pointer needs to be realigned for 64 bit handlers with error
	// code, so the argument offset changes by 8 bytes.
	Offset += 8;
	}
	}

	// FIXME: For now, all byval parameter objects are marked mutable. This can be
	// changed with more analysis.
	// In case of tail call optimization mark all arguments mutable. Since they
	// could be overwritten by lowering of arguments in case of a tail call.
	if (Flags.isByVal()) {
	unsigned Bytes = Flags.getByValSize();
	if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
	int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
	// Adjust SP offset of interrupt parameter.
	if (CallConv == CallingConv::X86_INTR) {
	MFI.setObjectOffset(FI, Offset);
	}
	return DAG.getFrameIndex(FI, PtrVT);
	}

	// This is an argument in memory. We might be able to perform copy elision.
	if (Flags.isCopyElisionCandidate()) {
	EVT ArgVT = Ins[i].ArgVT;
	SDValue PartAddr;
	if (Ins[i].PartOffset == 0) {
	// If this is a one-part value or the first part of a multi-part value,
	// create a stack object for the entire argument value type and return a
	// load from our portion of it. This assumes that if the first part of an
	// argument is in memory, the rest will also be in memory.
	int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
	/Immutable=/false);
	PartAddr = DAG.getFrameIndex(FI, PtrVT);
	return DAG.getLoad(
	ValVT, dl, Chain, PartAddr,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
	} else {
	// This is not the first piece of an argument in memory. See if there is
	// already a fixed stack object including this offset. If so, assume it
	// was created by the PartOffset == 0 branch above and create a load from
	// the appropriate offset into it.
	int64_t PartBegin = VA.getLocMemOffset();
	int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
	int FI = MFI.getObjectIndexBegin();
	for (; MFI.isFixedObjectIndex(FI); ++FI) {
	int64_t ObjBegin = MFI.getObjectOffset(FI);
	int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
	if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
	break;
	}
	if (MFI.isFixedObjectIndex(FI)) {
	SDValue Addr =
	DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
	DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
	return DAG.getLoad(
	ValVT, dl, Chain, Addr,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
	Ins[i].PartOffset));
	}
	}
	}

	int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
	VA.getLocMemOffset(), isImmutable);

	// Set SExt or ZExt flag.
	if (VA.getLocInfo() == CCValAssign::ZExt) {
	MFI.setObjectZExt(FI, true);
	} else if (VA.getLocInfo() == CCValAssign::SExt) {
	MFI.setObjectSExt(FI, true);
	}

	// Adjust SP offset of interrupt parameter.
	if (CallConv == CallingConv::X86_INTR) {
	MFI.setObjectOffset(FI, Offset);
	}

	SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
	SDValue Val = DAG.getLoad(
	ValVT, dl, Chain, FIN,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
	return ExtendedInMem
	? (VA.getValVT().isVector()
	? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
	: DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
	: Val;
	}

	// FIXME: Get this from tablegen.
	static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
	const X86Subtarget &Subtarget) {
	assert(Subtarget.is64Bit());

	if (Subtarget.isCallingConvWin64(CallConv)) {
	static const MCPhysReg GPR64ArgRegsWin64[] = {
	X86::RCX, X86::RDX, X86::R8, X86::R9
	};
	return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
	}

	static const MCPhysReg GPR64ArgRegs64Bit[] = {
	X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
	};
	return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
	}

	// FIXME: Get this from tablegen.
	static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
	CallingConv::ID CallConv,
	const X86Subtarget &Subtarget) {
	assert(Subtarget.is64Bit());
	if (Subtarget.isCallingConvWin64(CallConv)) {
	// The XMM registers which might contain var arg parameters are shadowed
	// in their paired GPR. So we only need to save the GPR to their home
	// slots.
	// TODO: __vectorcall will change this.
	return None;
	}

	const Function *Fn = MF.getFunction();
	bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
	bool isSoftFloat = Subtarget.useSoftFloat();
	assert(!(isSoftFloat && NoImplicitFloatOps) &&
	"SSE register cannot be used when SSE is disabled!");
	if (isSoftFloat \|\| NoImplicitFloatOps \|\| !Subtarget.hasSSE1())
	// Kernel mode asks for SSE to be disabled, so there are no XMM argument
	// registers.
	return None;

	static const MCPhysReg XMMArgRegs64Bit[] = {
	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
	};
	return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
	}

	#ifndef NDEBUG
	static bool isSortedByValueNo(const SmallVectorImpl<CCValAssign> &ArgLocs) {
	return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
	[](const CCValAssign &A, const CCValAssign &B) -> bool {
	return A.getValNo() < B.getValNo();
	});
	}
	#endif

	SDValue X86TargetLowering::LowerFormalArguments(
	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	MachineFunction &MF = DAG.getMachineFunction();
	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
	const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();

	const Function *Fn = MF.getFunction();
	if (Fn->hasExternalLinkage() &&
	Subtarget.isTargetCygMing() &&
	Fn->getName() == "main")
	FuncInfo->setForceFramePointer(true);

	MachineFrameInfo &MFI = MF.getFrameInfo();
	bool Is64Bit = Subtarget.is64Bit();
	bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);

	assert(
	!(isVarArg && canGuaranteeTCO(CallConv)) &&
	"Var args not supported with calling conv' regcall, fastcc, ghc or hipe");

	if (CallConv == CallingConv::X86_INTR) {
	bool isLegal = Ins.size() == 1 \|\|
	(Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) \|\|
	(!Is64Bit && Ins[1].VT == MVT::i32)));
	if (!isLegal)
	report_fatal_error("X86 interrupts may take one or two arguments");
	}

	// Assign locations to all of the incoming arguments.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());

	// Allocate shadow area for Win64.
	if (IsWin64)
	CCInfo.AllocateStack(32, 8);

	CCInfo.AnalyzeArguments(Ins, CC_X86);

	// In vectorcall calling convention a second pass is required for the HVA
	// types.
	if (CallingConv::X86_VectorCall == CallConv) {
	CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
	}

	// The next loop assumes that the locations are in the same order of the
	// input arguments.
	assert(isSortedByValueNo(ArgLocs) &&
	"Argument Location list must be sorted before lowering");

	SDValue ArgValue;
	for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
	++I, ++InsIndex) {
	assert(InsIndex < Ins.size() && "Invalid Ins index");
	CCValAssign &VA = ArgLocs[I];

	if (VA.isRegLoc()) {
	EVT RegVT = VA.getLocVT();
	if (VA.needsCustom()) {
	assert(
	VA.getValVT() == MVT::v64i1 &&
	"Currently the only custom case is when we split v64i1 to 2 regs");

	// v64i1 values, in regcall calling convention, that are
	// compiled to 32 bit arch, are split up into two registers.
	ArgValue =
	getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
	} else {
	const TargetRegisterClass *RC;
	if (RegVT == MVT::i32)
	RC = &X86::GR32RegClass;
	else if (Is64Bit && RegVT == MVT::i64)
	RC = &X86::GR64RegClass;
	else if (RegVT == MVT::f32)
	RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
	else if (RegVT == MVT::f64)
	RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
	else if (RegVT == MVT::f80)
	RC = &X86::RFP80RegClass;
	else if (RegVT == MVT::f128)
	RC = &X86::FR128RegClass;
	else if (RegVT.is512BitVector())
	RC = &X86::VR512RegClass;
	else if (RegVT.is256BitVector())
	RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
	else if (RegVT.is128BitVector())
	RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
	else if (RegVT == MVT::x86mmx)
	RC = &X86::VR64RegClass;
	else if (RegVT == MVT::v1i1)
	RC = &X86::VK1RegClass;
	else if (RegVT == MVT::v8i1)
	RC = &X86::VK8RegClass;
	else if (RegVT == MVT::v16i1)
	RC = &X86::VK16RegClass;
	else if (RegVT == MVT::v32i1)
	RC = &X86::VK32RegClass;
	else if (RegVT == MVT::v64i1)
	RC = &X86::VK64RegClass;
	else
	llvm_unreachable("Unknown argument type!");

	unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
	ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
	}

	// If this is an 8 or 16-bit value, it is really passed promoted to 32
	// bits. Insert an assert[sz]ext to capture this, then truncate to the
	// right size.
	if (VA.getLocInfo() == CCValAssign::SExt)
	ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
	DAG.getValueType(VA.getValVT()));
	else if (VA.getLocInfo() == CCValAssign::ZExt)
	ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
	DAG.getValueType(VA.getValVT()));
	else if (VA.getLocInfo() == CCValAssign::BCvt)
	ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);

	if (VA.isExtInLoc()) {
	// Handle MMX values passed in XMM regs.
	if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
	ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
	else if (VA.getValVT().isVector() &&
	VA.getValVT().getScalarType() == MVT::i1 &&
	((VA.getLocVT() == MVT::i64) \|\| (VA.getLocVT() == MVT::i32) \|\|
	(VA.getLocVT() == MVT::i16) \|\| (VA.getLocVT() == MVT::i8))) {
	// Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
	ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
	} else
	ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
	}
	} else {
	assert(VA.isMemLoc());
	ArgValue =
	LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
	}

	// If value is passed via pointer - do a load.
	if (VA.getLocInfo() == CCValAssign::Indirect)
	ArgValue =
	DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());

	InVals.push_back(ArgValue);
	}

	for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
	// Swift calling convention does not require we copy the sret argument
	// into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
	if (CallConv == CallingConv::Swift)
	continue;

	// All x86 ABIs require that for returning structs by value we copy the
	// sret argument into %rax/%eax (depending on ABI) for the return. Save
	// the argument into a virtual register so that we can access it from the
	// return points.
	if (Ins[I].Flags.isSRet()) {
	unsigned Reg = FuncInfo->getSRetReturnReg();
	if (!Reg) {
	MVT PtrTy = getPointerTy(DAG.getDataLayout());
	Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
	FuncInfo->setSRetReturnReg(Reg);
	}
	SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
	break;
	}
	}

	unsigned StackSize = CCInfo.getNextStackOffset();
	// Align stack specially for tail calls.
	if (shouldGuaranteeTCO(CallConv,
	MF.getTarget().Options.GuaranteedTailCallOpt))
	StackSize = GetAlignedArgumentStackSize(StackSize, DAG);

	// If the function takes variable number of arguments, make a frame index for
	// the start of the first vararg value... for expansion of llvm.va_start. We
	// can skip this if there are no va_start calls.
	if (MFI.hasVAStart() &&
	(Is64Bit \|\| (CallConv != CallingConv::X86_FastCall &&
	CallConv != CallingConv::X86_ThisCall))) {
	FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
	}

	// Figure out if XMM registers are in use.
	assert(!(Subtarget.useSoftFloat() &&
	Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
	"SSE register cannot be used when SSE is disabled!");

	// 64-bit calling conventions support varargs and register parameters, so we
	// have to do extra work to spill them in the prologue.
	if (Is64Bit && isVarArg && MFI.hasVAStart()) {
	// Find the first unallocated argument registers.
	ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
	ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
	unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
	assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
	"SSE register cannot be used when SSE is disabled!");

	// Gather all the live in physical registers.
	SmallVector<SDValue, 6> LiveGPRs;
	SmallVector<SDValue, 8> LiveXMMRegs;
	SDValue ALVal;
	for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
	unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
	LiveGPRs.push_back(
	DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
	}
	if (!ArgXMMs.empty()) {
	unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
	ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
	for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
	unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
	LiveXMMRegs.push_back(
	DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
	}
	}

	if (IsWin64) {
	// Get to the caller-allocated home save location. Add 8 to account
	// for the return address.
	int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
	FuncInfo->setRegSaveFrameIndex(
	MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
	// Fixup to set vararg frame on shadow area (4 x i64).
	if (NumIntRegs < 4)
	FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
	} else {
	// For X86-64, if there are vararg parameters that are passed via
	// registers, then we must store them to their spots on the stack so
	// they may be loaded by dereferencing the result of va_next.
	FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
	FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
	FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
	ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
	}

	// Store the integer parameter registers.
	SmallVector<SDValue, 8> MemOps;
	SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
	getPointerTy(DAG.getDataLayout()));
	unsigned Offset = FuncInfo->getVarArgsGPOffset();
	for (SDValue Val : LiveGPRs) {
	SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
	RSFIN, DAG.getIntPtrConstant(Offset, dl));
	SDValue Store =
	DAG.getStore(Val.getValue(1), dl, Val, FIN,
	MachinePointerInfo::getFixedStack(
	DAG.getMachineFunction(),
	FuncInfo->getRegSaveFrameIndex(), Offset));
	MemOps.push_back(Store);
	Offset += 8;
	}

	if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
	// Now store the XMM (fp + vector) parameter registers.
	SmallVector<SDValue, 12> SaveXMMOps;
	SaveXMMOps.push_back(Chain);
	SaveXMMOps.push_back(ALVal);
	SaveXMMOps.push_back(DAG.getIntPtrConstant(
	FuncInfo->getRegSaveFrameIndex(), dl));
	SaveXMMOps.push_back(DAG.getIntPtrConstant(
	FuncInfo->getVarArgsFPOffset(), dl));
	SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
	LiveXMMRegs.end());
	MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
	MVT::Other, SaveXMMOps));
	}

	if (!MemOps.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
	}

	if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
	// Find the largest legal vector type.
	MVT VecVT = MVT::Other;
	// FIXME: Only some x86_32 calling conventions support AVX512.
	if (Subtarget.hasAVX512() &&
	(Is64Bit \|\| (CallConv == CallingConv::X86_VectorCall \|\|
	CallConv == CallingConv::Intel_OCL_BI)))
	VecVT = MVT::v16f32;
	else if (Subtarget.hasAVX())
	VecVT = MVT::v8f32;
	else if (Subtarget.hasSSE2())
	VecVT = MVT::v4f32;

	// We forward some GPRs and some vector types.
	SmallVector<MVT, 2> RegParmTypes;
	MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
	RegParmTypes.push_back(IntVT);
	if (VecVT != MVT::Other)
	RegParmTypes.push_back(VecVT);

	// Compute the set of forwarded registers. The rest are scratch.
	SmallVectorImpl<ForwardedRegister> &Forwards =
	FuncInfo->getForwardedMustTailRegParms();
	CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);

	// Conservatively forward AL on x86_64, since it might be used for varargs.
	if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
	unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
	Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
	}

	// Copy all forwards from physical to virtual registers.
	for (ForwardedRegister &F : Forwards) {
	// FIXME: Can we use a less constrained schedule?
	SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
	F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
	Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
	}
	}

	// Some CCs need callee pop.
	if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
	MF.getTarget().Options.GuaranteedTailCallOpt)) {
	FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
	} else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
	// X86 interrupts must pop the error code (and the alignment padding) if
	// present.
	FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
	} else {
	FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
	// If this is an sret function, the return should pop the hidden pointer.
	if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
	!Subtarget.getTargetTriple().isOSMSVCRT() &&
	argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
	FuncInfo->setBytesToPopOnReturn(4);
	}

	if (!Is64Bit) {
	// RegSaveFrameIndex is X86-64 only.
	FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
	if (CallConv == CallingConv::X86_FastCall \|\|
	CallConv == CallingConv::X86_ThisCall)
	// fastcc functions can't have varargs.
	FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
	}

	FuncInfo->setArgumentStackSize(StackSize);

	if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
	EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
	if (Personality == EHPersonality::CoreCLR) {
	assert(Is64Bit);
	// TODO: Add a mechanism to frame lowering that will allow us to indicate
	// that we'd prefer this slot be allocated towards the bottom of the frame
	// (i.e. near the stack pointer after allocating the frame). Every
	// funclet needs a copy of this slot in its (mostly empty) frame, and the
	// offset from the bottom of this and each funclet's frame must be the
	// same, so the size of funclets' (mostly empty) frames is dictated by
	// how far this slot is from the bottom (since they allocate just enough
	// space to accommodate holding this slot at the correct offset).
	int PSPSymFI = MFI.CreateStackObject(8, 8, /isSS=/false);
	EHInfo->PSPSymFrameIdx = PSPSymFI;
	}
	}

	if (CallConv == CallingConv::X86_RegCall \|\|
	Fn->hasFnAttribute("no_caller_saved_registers")) {
	const MachineRegisterInfo &MRI = MF.getRegInfo();
	for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end()))
	MF.getRegInfo().disableCalleeSavedRegister(Pair.first);
	}

	return Chain;
	}

	SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
	SDValue Arg, const SDLoc &dl,
	SelectionDAG &DAG,
	const CCValAssign &VA,
	ISD::ArgFlagsTy Flags) const {
	unsigned LocMemOffset = VA.getLocMemOffset();
	SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
	PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
	StackPtr, PtrOff);
	if (Flags.isByVal())
	return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);

	return DAG.getStore(
	Chain, dl, Arg, PtrOff,
	MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
	}

	/// Emit a load of return address if tail call
	/// optimization is performed and it is required.
	SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
	SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
	bool Is64Bit, int FPDiff, const SDLoc &dl) const {
	// Adjust the Return address stack slot.
	EVT VT = getPointerTy(DAG.getDataLayout());
	OutRetAddr = getReturnAddressFrameIndex(DAG);

	// Load the "old" Return address.
	OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
	return SDValue(OutRetAddr.getNode(), 1);
	}

	/// Emit a store of the return address if tail call
	/// optimization is performed and it is required (FPDiff!=0).
	static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
	SDValue Chain, SDValue RetAddrFrIdx,
	EVT PtrVT, unsigned SlotSize,
	int FPDiff, const SDLoc &dl) {
	// Store the return address to the appropriate stack slot.
	if (!FPDiff) return Chain;
	// Calculate the new stack slot for the return address.
	int NewReturnAddrFI =
	MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
	false);
	SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
	Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
	MachinePointerInfo::getFixedStack(
	DAG.getMachineFunction(), NewReturnAddrFI));
	return Chain;
	}

	/// Returns a vector_shuffle mask for an movs{s\|d}, movd
	/// operation of specified width.
	static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
	SDValue V2) {
	unsigned NumElems = VT.getVectorNumElements();
	SmallVector<int, 8> Mask;
	Mask.push_back(NumElems);
	for (unsigned i = 1; i != NumElems; ++i)
	Mask.push_back(i);
	return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
	}

	SDValue
	X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const {
	SelectionDAG &DAG = CLI.DAG;
	SDLoc &dl = CLI.DL;
	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
	SDValue Chain = CLI.Chain;
	SDValue Callee = CLI.Callee;
	CallingConv::ID CallConv = CLI.CallConv;
	bool &isTailCall = CLI.IsTailCall;
	bool isVarArg = CLI.IsVarArg;

	MachineFunction &MF = DAG.getMachineFunction();
	bool Is64Bit = Subtarget.is64Bit();
	bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
	StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
	bool IsSibcall = false;
	X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
	auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
	const CallInst *CI =
	CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
	const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
	bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) \|\|
	(Fn && Fn->hasFnAttribute("no_caller_saved_registers"));

	if (CallConv == CallingConv::X86_INTR)
	report_fatal_error("X86 interrupts may not be called directly");

	if (Attr.getValueAsString() == "true")
	isTailCall = false;

	if (Subtarget.isPICStyleGOT() &&
	!MF.getTarget().Options.GuaranteedTailCallOpt) {
	// If we are using a GOT, disable tail calls to external symbols with
	// default visibility. Tail calling such a symbol requires using a GOT
	// relocation, which forces early binding of the symbol. This breaks code
	// that require lazy function symbol resolution. Using musttail or
	// GuaranteedTailCallOpt will override this.
	GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
	if (!G \|\| (!G->getGlobal()->hasLocalLinkage() &&
	G->getGlobal()->hasDefaultVisibility()))
	isTailCall = false;
	}

	bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
	if (IsMustTail) {
	// Force this to be a tail call. The verifier rules are enough to ensure
	// that we can lower this successfully without moving the return address
	// around.
	isTailCall = true;
	} else if (isTailCall) {
	// Check if it's really possible to do a tail call.
	isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
	isVarArg, SR != NotStructReturn,
	MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
	Outs, OutVals, Ins, DAG);

	// Sibcalls are automatically detected tailcalls which do not require
	// ABI changes.
	if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
	IsSibcall = true;

	if (isTailCall)
	++NumTailCalls;
	}

	assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
	"Var args not supported with calling convention fastcc, ghc or hipe");

	// Analyze operands of the call, assigning locations to each operand.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());

	// Allocate shadow area for Win64.
	if (IsWin64)
	CCInfo.AllocateStack(32, 8);

	CCInfo.AnalyzeArguments(Outs, CC_X86);

	// In vectorcall calling convention a second pass is required for the HVA
	// types.
	if (CallingConv::X86_VectorCall == CallConv) {
	CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
	}

	// Get a count of how many bytes are to be pushed on the stack.
	unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
	if (IsSibcall)
	// This is a sibcall. The memory operands are available in caller's
	// own caller's stack.
	NumBytes = 0;
	else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
	canGuaranteeTCO(CallConv))
	NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);

	int FPDiff = 0;
	if (isTailCall && !IsSibcall && !IsMustTail) {
	// Lower arguments at fp - stackoffset + fpdiff.
	unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();

	FPDiff = NumBytesCallerPushed - NumBytes;

	// Set the delta of movement of the returnaddr stackslot.
	// But only set if delta is greater than previous delta.
	if (FPDiff < X86Info->getTCReturnAddrDelta())
	X86Info->setTCReturnAddrDelta(FPDiff);
	}

	unsigned NumBytesToPush = NumBytes;
	unsigned NumBytesToPop = NumBytes;

	// If we have an inalloca argument, all stack space has already been allocated
	// for us and be right at the top of the stack. We don't support multiple
	// arguments passed in memory when using inalloca.
	if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
	NumBytesToPush = 0;
	if (!ArgLocs.back().isMemLoc())
	report_fatal_error("cannot use inalloca attribute on a register "
	"parameter");
	if (ArgLocs.back().getLocMemOffset() != 0)
	report_fatal_error("any parameter with the inalloca attribute must be "
	"the only memory argument");
	}

	if (!IsSibcall)
	Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
	NumBytes - NumBytesToPush, dl);

	SDValue RetAddrFrIdx;
	// Load return address for tail calls.
	if (isTailCall && FPDiff)
	Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
	Is64Bit, FPDiff, dl);

	SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
	SmallVector<SDValue, 8> MemOpChains;
	SDValue StackPtr;

	// The next loop assumes that the locations are in the same order of the
	// input arguments.
	assert(isSortedByValueNo(ArgLocs) &&
	"Argument Location list must be sorted before lowering");

	// Walk the register/memloc assignments, inserting copies/loads. In the case
	// of tail call optimization arguments are handle later.
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
	++I, ++OutIndex) {
	assert(OutIndex < Outs.size() && "Invalid Out index");
	// Skip inalloca arguments, they have already been written.
	ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
	if (Flags.isInAlloca())
	continue;

	CCValAssign &VA = ArgLocs[I];
	EVT RegVT = VA.getLocVT();
	SDValue Arg = OutVals[OutIndex];
	bool isByVal = Flags.isByVal();

	// Promote the value if needed.
	switch (VA.getLocInfo()) {
	default: llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full: break;
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
	break;
	case CCValAssign::AExt:
	if (Arg.getValueType().isVector() &&
	Arg.getValueType().getVectorElementType() == MVT::i1)
	Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
	else if (RegVT.is128BitVector()) {
	// Special case: passing MMX values in XMM registers.
	Arg = DAG.getBitcast(MVT::i64, Arg);
	Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
	Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
	} else
	Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
	break;
	case CCValAssign::BCvt:
	Arg = DAG.getBitcast(RegVT, Arg);
	break;
	case CCValAssign::Indirect: {
	// Store the argument.
	SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
	int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
	Chain = DAG.getStore(
	Chain, dl, Arg, SpillSlot,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
	Arg = SpillSlot;
	break;
	}
	}

	if (VA.needsCustom()) {
	assert(VA.getValVT() == MVT::v64i1 &&
	"Currently the only custom case is when we split v64i1 to 2 regs");
	// Split v64i1 value into two registers
	Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
	Subtarget);
	} else if (VA.isRegLoc()) {
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
	if (isVarArg && IsWin64) {
	// Win64 ABI requires argument XMM reg to be copied to the corresponding
	// shadow reg if callee is a varargs function.
	unsigned ShadowReg = 0;
	switch (VA.getLocReg()) {
	case X86::XMM0: ShadowReg = X86::RCX; break;
	case X86::XMM1: ShadowReg = X86::RDX; break;
	case X86::XMM2: ShadowReg = X86::R8; break;
	case X86::XMM3: ShadowReg = X86::R9; break;
	}
	if (ShadowReg)
	RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
	}
	} else if (!IsSibcall && (!isTailCall \|\| isByVal)) {
	assert(VA.isMemLoc());
	if (!StackPtr.getNode())
	StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
	getPointerTy(DAG.getDataLayout()));
	MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
	dl, DAG, VA, Flags));
	}
	}

	if (!MemOpChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

	if (Subtarget.isPICStyleGOT()) {
	// ELF / PIC requires GOT in the EBX register before function calls via PLT
	// GOT pointer.
	if (!isTailCall) {
	RegsToPass.push_back(std::make_pair(
	unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
	getPointerTy(DAG.getDataLayout()))));
	} else {
	// If we are tail calling and generating PIC/GOT style code load the
	// address of the callee into ECX. The value in ecx is used as target of
	// the tail jump. This is done to circumvent the ebx/callee-saved problem
	// for tail calls on PIC/GOT architectures. Normally we would just put the
	// address of GOT into ebx and then call target@PLT. But for tail calls
	// ebx would be restored (since ebx is callee saved) before jumping to the
	// target@PLT.

	// Note: The actual moving to ECX is done further down.
	GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
	if (G && !G->getGlobal()->hasLocalLinkage() &&
	G->getGlobal()->hasDefaultVisibility())
	Callee = LowerGlobalAddress(Callee, DAG);
	else if (isa<ExternalSymbolSDNode>(Callee))
	Callee = LowerExternalSymbol(Callee, DAG);
	}
	}

	if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
	// From AMD64 ABI document:
	// For calls that may call functions that use varargs or stdargs
	// (prototype-less calls or calls to functions containing ellipsis (...) in
	// the declaration) %al is used as hidden argument to specify the number
	// of SSE registers used. The contents of %al do not need to match exactly
	// the number of registers, but must be an ubound on the number of SSE
	// registers used and is in the range 0 - 8 inclusive.

	// Count the number of XMM registers allocated.
	static const MCPhysReg XMMArgRegs[] = {
	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
	};
	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
	assert((Subtarget.hasSSE1() \|\| !NumXMMRegs)
	&& "SSE registers cannot be used when SSE is disabled");

	RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
	DAG.getConstant(NumXMMRegs, dl,
	MVT::i8)));
	}

	if (isVarArg && IsMustTail) {
	const auto &Forwards = X86Info->getForwardedMustTailRegParms();
	for (const auto &F : Forwards) {
	SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
	RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
	}
	}

	// For tail calls lower the arguments to the 'real' stack slots. Sibcalls
	// don't need this because the eligibility check rejects calls that require
	// shuffling arguments passed in memory.
	if (!IsSibcall && isTailCall) {
	// Force all the incoming stack arguments to be loaded from the stack
	// before any new outgoing arguments are stored to the stack, because the
	// outgoing stack slots may alias the incoming argument stack slots, and
	// the alias isn't otherwise explicit. This is slightly more conservative
	// than necessary, because it means that each store effectively depends
	// on every argument instead of just those arguments it would clobber.
	SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);

	SmallVector<SDValue, 8> MemOpChains2;
	SDValue FIN;
	int FI = 0;
	for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
	++I, ++OutsIndex) {
	CCValAssign &VA = ArgLocs[I];

	if (VA.isRegLoc()) {
	if (VA.needsCustom()) {
	assert((CallConv == CallingConv::X86_RegCall) &&
	"Expecting custom case only in regcall calling convention");
	// This means that we are in special case where one argument was
	// passed through two register locations - Skip the next location
	++I;
	}

	continue;
	}

	assert(VA.isMemLoc());
	SDValue Arg = OutVals[OutsIndex];
	ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
	// Skip inalloca arguments. They don't require any work.
	if (Flags.isInAlloca())
	continue;
	// Create frame index.
	int32_t Offset = VA.getLocMemOffset()+FPDiff;
	uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
	FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
	FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));

	if (Flags.isByVal()) {
	// Copy relative to framepointer.
	SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
	if (!StackPtr.getNode())
	StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
	getPointerTy(DAG.getDataLayout()));
	Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
	StackPtr, Source);

	MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
	ArgChain,
	Flags, DAG, dl));
	} else {
	// Store relative to framepointer.
	MemOpChains2.push_back(DAG.getStore(
	ArgChain, dl, Arg, FIN,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
	}
	}

	if (!MemOpChains2.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);

	// Store the return address to the appropriate stack slot.
	Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
	getPointerTy(DAG.getDataLayout()),
	RegInfo->getSlotSize(), FPDiff, dl);
	}

	// Build a sequence of copy-to-reg nodes chained together with token chain
	// and flag operands which copy the outgoing args into registers.
	SDValue InFlag;
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
	Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
	RegsToPass[i].second, InFlag);
	InFlag = Chain.getValue(1);
	}

	if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
	assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
	// In the 64-bit large code model, we have to make all calls
	// through a register, since the call instruction's 32-bit
	// pc-relative offset may not be large enough to hold the whole
	// address.
	} else if (Callee->getOpcode() == ISD::GlobalAddress) {
	// If the callee is a GlobalAddress node (quite common, every direct call
	// is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
	// it.
	GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);

	// We should use extra load for direct calls to dllimported functions in
	// non-JIT mode.
	const GlobalValue *GV = G->getGlobal();
	if (!GV->hasDLLImportStorageClass()) {
	unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);

	Callee = DAG.getTargetGlobalAddress(
	GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);

	if (OpFlags == X86II::MO_GOTPCREL) {
	// Add a wrapper.
	Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
	getPointerTy(DAG.getDataLayout()), Callee);
	// Add extra indirection
	Callee = DAG.getLoad(
	getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));
	}
	}
	} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
	const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
	unsigned char OpFlags =
	Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);

	Callee = DAG.getTargetExternalSymbol(
	S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
	} else if (Subtarget.isTarget64BitILP32() &&
	Callee->getValueType(0) == MVT::i32) {
	// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
	Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
	}

	// Returns a chain & a flag for retval copy to use.
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SmallVector<SDValue, 8> Ops;

	if (!IsSibcall && isTailCall) {
	Chain = DAG.getCALLSEQ_END(Chain,
	DAG.getIntPtrConstant(NumBytesToPop, dl, true),
	DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
	InFlag = Chain.getValue(1);
	}

	Ops.push_back(Chain);
	Ops.push_back(Callee);

	if (isTailCall)
	Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));

	// Add argument registers to the end of the list so that they are known live
	// into the call.
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
	RegsToPass[i].second.getValueType()));

	// Add a register mask operand representing the call-preserved registers.
	// If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
	// set X86_INTR calling convention because it has the same CSR mask
	// (same preserved registers).
	const uint32_t *Mask = RegInfo->getCallPreservedMask(
	MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
	assert(Mask && "Missing call preserved mask for calling convention");

	// If this is an invoke in a 32-bit function using a funclet-based
	// personality, assume the function clobbers all registers. If an exception
	// is thrown, the runtime will not restore CSRs.
	// FIXME: Model this more precisely so that we can register allocate across
	// the normal edge and spill and fill across the exceptional edge.
	if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
	const Function *CallerFn = MF.getFunction();
	EHPersonality Pers =
	CallerFn->hasPersonalityFn()
	? classifyEHPersonality(CallerFn->getPersonalityFn())
	: EHPersonality::Unknown;
	if (isFuncletEHPersonality(Pers))
	Mask = RegInfo->getNoPreservedMask();
	}

	// Define a new register mask from the existing mask.
	uint32_t *RegMask = nullptr;

	// In some calling conventions we need to remove the used physical registers
	// from the reg mask.
	if (CallConv == CallingConv::X86_RegCall \|\| HasNCSR) {
	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();

	// Allocate a new Reg Mask and copy Mask.
	RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
	unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
	memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);

	// Make sure all sub registers of the argument registers are reset
	// in the RegMask.
	for (auto const &RegPair : RegsToPass)
	for (MCSubRegIterator SubRegs(RegPair.first, TRI, /IncludeSelf=/true);
	SubRegs.isValid(); ++SubRegs)
	RegMask[SubRegs / 32] &= ~(1u << (SubRegs % 32));

	// Create the RegMask Operand according to our updated mask.
	Ops.push_back(DAG.getRegisterMask(RegMask));
	} else {
	// Create the RegMask Operand according to the static mask.
	Ops.push_back(DAG.getRegisterMask(Mask));
	}

	if (InFlag.getNode())
	Ops.push_back(InFlag);

	if (isTailCall) {
	// We used to do:
	//// If this is the first return lowered for this function, add the regs
	//// to the liveout set for the function.
	// This isn't right, although it's probably harmless on x86; liveouts
	// should be computed from returns not tail calls. Consider a void
	// function making a tail call to a function returning int.
	MF.getFrameInfo().setHasTailCall();
	return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
	}

	Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
	InFlag = Chain.getValue(1);

	// Create the CALLSEQ_END node.
	unsigned NumBytesForCalleeToPop;
	if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
	DAG.getTarget().Options.GuaranteedTailCallOpt))
	NumBytesForCalleeToPop = NumBytes; // Callee pops everything
	else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
	!Subtarget.getTargetTriple().isOSMSVCRT() &&
	SR == StackStructReturn)
	// If this is a call to a struct-return function, the callee
	// pops the hidden struct pointer, so we have to push it back.
	// This is common for Darwin/X86, Linux & Mingw32 targets.
	// For MSVC Win32 targets, the caller pops the hidden struct pointer.
	NumBytesForCalleeToPop = 4;
	else
	NumBytesForCalleeToPop = 0; // Callee pops nothing.

	if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
	// No need to reset the stack after the call if the call doesn't return. To
	// make the MI verify, we'll pretend the callee does it for us.
	NumBytesForCalleeToPop = NumBytes;
	}

	// Returns a flag for retval copy to use.
	if (!IsSibcall) {
	Chain = DAG.getCALLSEQ_END(Chain,
	DAG.getIntPtrConstant(NumBytesToPop, dl, true),
	DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
	true),
	InFlag, dl);
	InFlag = Chain.getValue(1);
	}

	// Handle result values, copying them out of physregs into vregs that we
	// return.
	return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
	InVals, RegMask);
	}

	//===----------------------------------------------------------------------===//
	// Fast Calling Convention (tail call) implementation
	//===----------------------------------------------------------------------===//

	// Like std call, callee cleans arguments, convention except that ECX is
	// reserved for storing the tail called function address. Only 2 registers are
	// free for argument passing (inreg). Tail call optimization is performed
	// provided:
	// * tailcallopt is enabled
	// * caller/callee are fastcc
	// On X86_64 architecture with GOT-style position independent code only local
	// (within module) calls are supported at the moment.
	// To keep the stack aligned according to platform abi the function
	// GetAlignedArgumentStackSize ensures that argument delta is always multiples
	// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
	// If a tail called function callee has more arguments than the caller the
	// caller needs to make sure that there is room to move the RETADDR to. This is
	// achieved by reserving an area the size of the argument delta right after the
	// original RETADDR, but before the saved framepointer or the spilled registers
	// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
	// stack layout:
	// arg1
	// arg2
	// RETADDR
	// [ new RETADDR
	// move area ]
	// (possible EBP)
	// ESI
	// EDI
	// local1 ..

	/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
	/// requirement.
	unsigned
	X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
	SelectionDAG& DAG) const {
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
	unsigned StackAlignment = TFI.getStackAlignment();
	uint64_t AlignMask = StackAlignment - 1;
	int64_t Offset = StackSize;
	unsigned SlotSize = RegInfo->getSlotSize();
	if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
	// Number smaller than 12 so just add the difference.
	Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
	} else {
	// Mask out lower bits, add stackalignment once plus the 12 bytes.
	Offset = ((~AlignMask) & Offset) + StackAlignment +
	(StackAlignment-SlotSize);
	}
	return Offset;
	}

	/// Return true if the given stack call argument is already available in the
	/// same position (relatively) of the caller's incoming argument stack.
	static
	bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
	MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
	const X86InstrInfo *TII, const CCValAssign &VA) {
	unsigned Bytes = Arg.getValueSizeInBits() / 8;

	for (;;) {
	// Look through nodes that don't alter the bits of the incoming value.
	unsigned Op = Arg.getOpcode();
	if (Op == ISD::ZERO_EXTEND \|\| Op == ISD::ANY_EXTEND \|\| Op == ISD::BITCAST) {
	Arg = Arg.getOperand(0);
	continue;
	}
	if (Op == ISD::TRUNCATE) {
	const SDValue &TruncInput = Arg.getOperand(0);
	if (TruncInput.getOpcode() == ISD::AssertZext &&
	cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
	Arg.getValueType()) {
	Arg = TruncInput.getOperand(0);
	continue;
	}
	}
	break;
	}

	int FI = INT_MAX;
	if (Arg.getOpcode() == ISD::CopyFromReg) {
	unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
	if (!TargetRegisterInfo::isVirtualRegister(VR))
	return false;
	MachineInstr *Def = MRI->getVRegDef(VR);
	if (!Def)
	return false;
	if (!Flags.isByVal()) {
	if (!TII->isLoadFromStackSlot(*Def, FI))
	return false;
	} else {
	unsigned Opcode = Def->getOpcode();
	if ((Opcode == X86::LEA32r \|\| Opcode == X86::LEA64r \|\|
	Opcode == X86::LEA64_32r) &&
	Def->getOperand(1).isFI()) {
	FI = Def->getOperand(1).getIndex();
	Bytes = Flags.getByValSize();
	} else
	return false;
	}
	} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
	if (Flags.isByVal())
	// ByVal argument is passed in as a pointer but it's now being
	// dereferenced. e.g.
	// define @foo(%struct.X* %A) {
	// tail call @bar(%struct.X* byval %A)
	// }
	return false;
	SDValue Ptr = Ld->getBasePtr();
	FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
	if (!FINode)
	return false;
	FI = FINode->getIndex();
	} else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
	FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
	FI = FINode->getIndex();
	Bytes = Flags.getByValSize();
	} else
	return false;

	assert(FI != INT_MAX);
	if (!MFI.isFixedObjectIndex(FI))
	return false;

	if (Offset != MFI.getObjectOffset(FI))
	return false;

	// If this is not byval, check that the argument stack object is immutable.
	// inalloca and argument copy elision can create mutable argument stack
	// objects. Byval objects can be mutated, but a byval call intends to pass the
	// mutated memory.
	if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
	return false;

	if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
	// If the argument location is wider than the argument type, check that any
	// extension flags match.
	if (Flags.isZExt() != MFI.isObjectZExt(FI) \|\|
	Flags.isSExt() != MFI.isObjectSExt(FI)) {
	return false;
	}
	}

	return Bytes == MFI.getObjectSize(FI);
	}

	/// Check whether the call is eligible for tail call optimization. Targets
	/// that want to do tail call optimization should implement this function.
	bool X86TargetLowering::IsEligibleForTailCallOptimization(
	SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
	bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
	if (!mayTailCallThisCC(CalleeCC))
	return false;

	// If -tailcallopt is specified, make fastcc functions tail-callable.
	MachineFunction &MF = DAG.getMachineFunction();
	const Function *CallerF = MF.getFunction();

	// If the function return type is x86_fp80 and the callee return type is not,
	// then the FP_EXTEND of the call result is not a nop. It's not safe to
	// perform a tailcall optimization here.
	if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
	return false;

	CallingConv::ID CallerCC = CallerF->getCallingConv();
	bool CCMatch = CallerCC == CalleeCC;
	bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
	bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);

	// Win64 functions have extra shadow space for argument homing. Don't do the
	// sibcall if the caller and callee have mismatched expectations for this
	// space.
	if (IsCalleeWin64 != IsCallerWin64)
	return false;

	if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
	if (canGuaranteeTCO(CalleeCC) && CCMatch)
	return true;
	return false;
	}

	// Look for obvious safe cases to perform tail call optimization that do not
	// require ABI changes. This is what gcc calls sibcall.

	// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
	// emit a special epilogue.
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	if (RegInfo->needsStackRealignment(MF))
	return false;

	// Also avoid sibcall optimization if either caller or callee uses struct
	// return semantics.
	if (isCalleeStructRet \|\| isCallerStructRet)
	return false;

	// Do not sibcall optimize vararg calls unless all arguments are passed via
	// registers.
	LLVMContext &C = *DAG.getContext();
	if (isVarArg && !Outs.empty()) {
	// Optimizing for varargs on Win64 is unlikely to be safe without
	// additional testing.
	if (IsCalleeWin64 \|\| IsCallerWin64)
	return false;

	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);

	CCInfo.AnalyzeCallOperands(Outs, CC_X86);
	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
	if (!ArgLocs[i].isRegLoc())
	return false;
	}

	// If the call result is in ST0 / ST1, it needs to be popped off the x87
	// stack. Therefore, if it's not used by the call it is not safe to optimize
	// this into a sibcall.
	bool Unused = false;
	for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
	if (!Ins[i].Used) {
	Unused = true;
	break;
	}
	}
	if (Unused) {
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
	CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
	for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
	CCValAssign &VA = RVLocs[i];
	if (VA.getLocReg() == X86::FP0 \|\| VA.getLocReg() == X86::FP1)
	return false;
	}
	}

	// Check that the call results are passed in the same way.
	if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
	RetCC_X86, RetCC_X86))
	return false;
	// The callee has to preserve all registers the caller needs to preserve.
	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
	if (!CCMatch) {
	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
	if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
	return false;
	}

	unsigned StackArgsSize = 0;

	// If the callee takes no arguments then go on to check the results of the
	// call.
	if (!Outs.empty()) {
	// Check if stack adjustment is needed. For now, do not do this if any
	// argument is passed on the stack.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);

	// Allocate shadow area for Win64
	if (IsCalleeWin64)
	CCInfo.AllocateStack(32, 8);

	CCInfo.AnalyzeCallOperands(Outs, CC_X86);
	StackArgsSize = CCInfo.getNextStackOffset();

	if (CCInfo.getNextStackOffset()) {
	// Check if the arguments are already laid out in the right way as
	// the caller's fixed stack objects.
	MachineFrameInfo &MFI = MF.getFrameInfo();
	const MachineRegisterInfo *MRI = &MF.getRegInfo();
	const X86InstrInfo *TII = Subtarget.getInstrInfo();
	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i];
	SDValue Arg = OutVals[i];
	ISD::ArgFlagsTy Flags = Outs[i].Flags;
	if (VA.getLocInfo() == CCValAssign::Indirect)
	return false;
	if (!VA.isRegLoc()) {
	if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
	MFI, MRI, TII, VA))
	return false;
	}
	}
	}

	bool PositionIndependent = isPositionIndependent();
	// If the tailcall address may be in a register, then make sure it's
	// possible to register allocate for it. In 32-bit, the call address can
	// only target EAX, EDX, or ECX since the tail call must be scheduled after
	// callee-saved registers are restored. These happen to be the same
	// registers used to pass 'inreg' arguments so watch out for those.
	if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
	!isa<ExternalSymbolSDNode>(Callee)) \|\|
	PositionIndependent)) {
	unsigned NumInRegs = 0;
	// In PIC we need an extra register to formulate the address computation
	// for the callee.
	unsigned MaxInRegs = PositionIndependent ? 2 : 3;

	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i];
	if (!VA.isRegLoc())
	continue;
	unsigned Reg = VA.getLocReg();
	switch (Reg) {
	default: break;
	case X86::EAX: case X86::EDX: case X86::ECX:
	if (++NumInRegs == MaxInRegs)
	return false;
	break;
	}
	}
	}

	const MachineRegisterInfo &MRI = MF.getRegInfo();
	if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
	return false;
	}

	bool CalleeWillPop =
	X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
	MF.getTarget().Options.GuaranteedTailCallOpt);

	if (unsigned BytesToPop =
	MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
	// If we have bytes to pop, the callee must pop them.
	bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
	if (!CalleePopMatches)
	return false;
	} else if (CalleeWillPop && StackArgsSize > 0) {
	// If we don't have bytes to pop, make sure the callee doesn't pop any.
	return false;
	}

	return true;
	}

	FastISel *
	X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
	const TargetLibraryInfo *libInfo) const {
	return X86::createFastISel(funcInfo, libInfo);
	}

	//===----------------------------------------------------------------------===//
	// Other Lowering Hooks
	//===----------------------------------------------------------------------===//

	static bool MayFoldLoad(SDValue Op) {
	return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
	}

	static bool MayFoldIntoStore(SDValue Op) {
	return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
	}

	static bool MayFoldIntoZeroExtend(SDValue Op) {
	if (Op.hasOneUse()) {
	unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
	return (ISD::ZERO_EXTEND == Opcode);
	}
	return false;
	}

	static bool isTargetShuffle(unsigned Opcode) {
	switch(Opcode) {
	default: return false;
	case X86ISD::BLENDI:
	case X86ISD::PSHUFB:
	case X86ISD::PSHUFD:
	case X86ISD::PSHUFHW:
	case X86ISD::PSHUFLW:
	case X86ISD::SHUFP:
	case X86ISD::INSERTPS:
	case X86ISD::EXTRQI:
	case X86ISD::INSERTQI:
	case X86ISD::PALIGNR:
	case X86ISD::VSHLDQ:
	case X86ISD::VSRLDQ:
	case X86ISD::MOVLHPS:
	case X86ISD::MOVLHPD:
	case X86ISD::MOVHLPS:
	case X86ISD::MOVLPS:
	case X86ISD::MOVLPD:
	case X86ISD::MOVSHDUP:
	case X86ISD::MOVSLDUP:
	case X86ISD::MOVDDUP:
	case X86ISD::MOVSS:
	case X86ISD::MOVSD:
	case X86ISD::UNPCKL:
	case X86ISD::UNPCKH:
	case X86ISD::VBROADCAST:
	case X86ISD::VPERMILPI:
	case X86ISD::VPERMILPV:
	case X86ISD::VPERM2X128:
	case X86ISD::VPERMIL2:
	case X86ISD::VPERMI:
	case X86ISD::VPPERM:
	case X86ISD::VPERMV:
	case X86ISD::VPERMV3:
	case X86ISD::VPERMIV3:
	case X86ISD::VZEXT_MOVL:
	return true;
	}
	}

	static bool isTargetShuffleVariableMask(unsigned Opcode) {
	switch (Opcode) {
	default: return false;
	// Target Shuffles.
	case X86ISD::PSHUFB:
	case X86ISD::VPERMILPV:
	case X86ISD::VPERMIL2:
	case X86ISD::VPPERM:
	case X86ISD::VPERMV:
	case X86ISD::VPERMV3:
	case X86ISD::VPERMIV3:
	return true;
	// 'Faux' Target Shuffles.
	case ISD::AND:
	case X86ISD::ANDNP:
	return true;
	}
	}

	SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
	int ReturnAddrIndex = FuncInfo->getRAIndex();

	if (ReturnAddrIndex == 0) {
	// Set up a frame object for the return address.
	unsigned SlotSize = RegInfo->getSlotSize();
	ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
	-(int64_t)SlotSize,
	false);
	FuncInfo->setRAIndex(ReturnAddrIndex);
	}

	return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
	}

	bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
	bool hasSymbolicDisplacement) {
	// Offset should fit into 32 bit immediate field.
	if (!isInt<32>(Offset))
	return false;

	// If we don't have a symbolic displacement - we don't have any extra
	// restrictions.
	if (!hasSymbolicDisplacement)
	return true;

	// FIXME: Some tweaks might be needed for medium code model.
	if (M != CodeModel::Small && M != CodeModel::Kernel)
	return false;

	// For small code model we assume that latest object is 16MB before end of 31
	// bits boundary. We may also accept pretty large negative constants knowing
	// that all objects are in the positive half of address space.
	if (M == CodeModel::Small && Offset < 1610241024)
	return true;

	// For kernel code model we know that all object resist in the negative half
	// of 32bits address space. We may not accept negative offsets, since they may
	// be just off and we may accept pretty large positive ones.
	if (M == CodeModel::Kernel && Offset >= 0)
	return true;

	return false;
	}

	/// Determines whether the callee is required to pop its own arguments.
	/// Callee pop is necessary to support tail calls.
	bool X86::isCalleePop(CallingConv::ID CallingConv,
	bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
	// If GuaranteeTCO is true, we force some calls to be callee pop so that we
	// can guarantee TCO.
	if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
	return true;

	switch (CallingConv) {
	default:
	return false;
	case CallingConv::X86_StdCall:
	case CallingConv::X86_FastCall:
	case CallingConv::X86_ThisCall:
	case CallingConv::X86_VectorCall:
	return !is64Bit;
	}
	}

	/// \brief Return true if the condition is an unsigned comparison operation.
	static bool isX86CCUnsigned(unsigned X86CC) {
	switch (X86CC) {
	default:
	llvm_unreachable("Invalid integer condition!");
	case X86::COND_E:
	case X86::COND_NE:
	case X86::COND_B:
	case X86::COND_A:
	case X86::COND_BE:
	case X86::COND_AE:
	return true;
	case X86::COND_G:
	case X86::COND_GE:
	case X86::COND_L:
	case X86::COND_LE:
	return false;
	}
	}

	static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
	switch (SetCCOpcode) {
	default: llvm_unreachable("Invalid integer condition!");
	case ISD::SETEQ: return X86::COND_E;
	case ISD::SETGT: return X86::COND_G;
	case ISD::SETGE: return X86::COND_GE;
	case ISD::SETLT: return X86::COND_L;
	case ISD::SETLE: return X86::COND_LE;
	case ISD::SETNE: return X86::COND_NE;
	case ISD::SETULT: return X86::COND_B;
	case ISD::SETUGT: return X86::COND_A;
	case ISD::SETULE: return X86::COND_BE;
	case ISD::SETUGE: return X86::COND_AE;
	}
	}

	/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
	/// condition code, returning the condition code and the LHS/RHS of the
	/// comparison to make.
	static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
	bool isFP, SDValue &LHS, SDValue &RHS,
	SelectionDAG &DAG) {
	if (!isFP) {
	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
	if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
	// X > -1 -> X == 0, jump !sign.
	RHS = DAG.getConstant(0, DL, RHS.getValueType());
	return X86::COND_NS;
	}
	if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
	// X < 0 -> X == 0, jump on sign.
	return X86::COND_S;
	}
	if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
	// X < 1 -> X <= 0
	RHS = DAG.getConstant(0, DL, RHS.getValueType());
	return X86::COND_LE;
	}
	}

	return TranslateIntegerX86CC(SetCCOpcode);
	}

	// First determine if it is required or is profitable to flip the operands.

	// If LHS is a foldable load, but RHS is not, flip the condition.
	if (ISD::isNON_EXTLoad(LHS.getNode()) &&
	!ISD::isNON_EXTLoad(RHS.getNode())) {
	SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
	std::swap(LHS, RHS);
	}

	switch (SetCCOpcode) {
	default: break;
	case ISD::SETOLT:
	case ISD::SETOLE:
	case ISD::SETUGT:
	case ISD::SETUGE:
	std::swap(LHS, RHS);
	break;
	}

	// On a floating point condition, the flags are set as follows:
	// ZF PF CF op
	// 0 \| 0 \| 0 \| X > Y
	// 0 \| 0 \| 1 \| X < Y
	// 1 \| 0 \| 0 \| X == Y
	// 1 \| 1 \| 1 \| unordered
	switch (SetCCOpcode) {
	default: llvm_unreachable("Condcode should be pre-legalized away");
	case ISD::SETUEQ:
	case ISD::SETEQ: return X86::COND_E;
	case ISD::SETOLT: // flipped
	case ISD::SETOGT:
	case ISD::SETGT: return X86::COND_A;
	case ISD::SETOLE: // flipped
	case ISD::SETOGE:
	case ISD::SETGE: return X86::COND_AE;
	case ISD::SETUGT: // flipped
	case ISD::SETULT:
	case ISD::SETLT: return X86::COND_B;
	case ISD::SETUGE: // flipped
	case ISD::SETULE:
	case ISD::SETLE: return X86::COND_BE;
	case ISD::SETONE:
	case ISD::SETNE: return X86::COND_NE;
	case ISD::SETUO: return X86::COND_P;
	case ISD::SETO: return X86::COND_NP;
	case ISD::SETOEQ:
	case ISD::SETUNE: return X86::COND_INVALID;
	}
	}

	/// Is there a floating point cmov for the specific X86 condition code?
	/// Current x86 isa includes the following FP cmov instructions:
	/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
	static bool hasFPCMov(unsigned X86CC) {
	switch (X86CC) {
	default:
	return false;
	case X86::COND_B:
	case X86::COND_BE:
	case X86::COND_E:
	case X86::COND_P:
	case X86::COND_A:
	case X86::COND_AE:
	case X86::COND_NE:
	case X86::COND_NP:
	return true;
	}
	}


	bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
	const CallInst &I,
	unsigned Intrinsic) const {

	const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
	if (!IntrData)
	return false;

	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.readMem = false;
	Info.writeMem = false;
	Info.vol = false;
	Info.offset = 0;

	switch (IntrData->Type) {
	case EXPAND_FROM_MEM: {
	Info.ptrVal = I.getArgOperand(0);
	Info.memVT = MVT::getVT(I.getType());
	Info.align = 1;
	Info.readMem = true;
	break;
	}
	case COMPRESS_TO_MEM: {
	Info.ptrVal = I.getArgOperand(0);
	Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
	Info.align = 1;
	Info.writeMem = true;
	break;
	}
	case TRUNCATE_TO_MEM_VI8:
	case TRUNCATE_TO_MEM_VI16:
	case TRUNCATE_TO_MEM_VI32: {
	Info.ptrVal = I.getArgOperand(0);
	MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
	MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
	if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
	ScalarVT = MVT::i8;
	else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
	ScalarVT = MVT::i16;
	else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
	ScalarVT = MVT::i32;

	Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
	Info.align = 1;
	Info.writeMem = true;
	break;
	}
	default:
	return false;
	}

	return true;
	}

	/// Returns true if the target can instruction select the
	/// specified FP immediate natively. If false, the legalizer will
	/// materialize the FP immediate as a load from a constant pool.
	bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
	for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
	if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
	return true;
	}
	return false;
	}

	bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
	ISD::LoadExtType ExtTy,
	EVT NewVT) const {
	// "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
	// relocation target a movq or addq instruction: don't let the load shrink.
	SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
	if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
	if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
	return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
	return true;
	}

	/// \brief Returns true if it is beneficial to convert a load of a constant
	/// to just the constant itself.
	bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
	Type *Ty) const {
	assert(Ty->isIntegerTy());

	unsigned BitSize = Ty->getPrimitiveSizeInBits();
	if (BitSize == 0 \|\| BitSize > 64)
	return false;
	return true;
	}

	bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT,
	unsigned Index) const {
	if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
	return false;

	return (Index == 0 \|\| Index == ResVT.getVectorNumElements());
	}

	bool X86TargetLowering::isCheapToSpeculateCttz() const {
	// Speculate cttz only if we can directly use TZCNT.
	return Subtarget.hasBMI();
	}

	bool X86TargetLowering::isCheapToSpeculateCtlz() const {
	// Speculate ctlz only if we can directly use LZCNT.
	return Subtarget.hasLZCNT();
	}

	bool X86TargetLowering::isCtlzFast() const {
	return Subtarget.hasFastLZCNT();
	}

	bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
	const Instruction &AndI) const {
	return true;
	}

	bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
	if (!Subtarget.hasBMI())
	return false;

	// There are only 32-bit and 64-bit forms for 'andn'.
	EVT VT = Y.getValueType();
	if (VT != MVT::i32 && VT != MVT::i64)
	return false;

	return true;
	}

	MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
	MVT VT = MVT::getIntegerVT(NumBits);
	if (isTypeLegal(VT))
	return VT;

	// PMOVMSKB can handle this.
	if (NumBits == 128 && isTypeLegal(MVT::v16i8))
	return MVT::v16i8;

	// VPMOVMSKB can handle this.
	if (NumBits == 256 && isTypeLegal(MVT::v32i8))
	return MVT::v32i8;

	// TODO: Allow 64-bit type for 32-bit target.
	// TODO: 512-bit types should be allowed, but make sure that those
	// cases are handled in combineVectorSizedSetCCEquality().

	return MVT::INVALID_SIMPLE_VALUE_TYPE;
	}

	/// Val is the undef sentinel value or equal to the specified value.
	static bool isUndefOrEqual(int Val, int CmpVal) {
	return ((Val == SM_SentinelUndef) \|\| (Val == CmpVal));
	}

	/// Val is either the undef or zero sentinel value.
	static bool isUndefOrZero(int Val) {
	return ((Val == SM_SentinelUndef) \|\| (Val == SM_SentinelZero));
	}

	/// Return true if every element in Mask, beginning
	/// from position Pos and ending in Pos+Size is the undef sentinel value.
	static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
	for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
	if (Mask[i] != SM_SentinelUndef)
	return false;
	return true;
	}

	/// Return true if Val is undef or if its value falls within the
	/// specified range (L, H].
	static bool isUndefOrInRange(int Val, int Low, int Hi) {
	return (Val == SM_SentinelUndef) \|\| (Val >= Low && Val < Hi);
	}

	/// Return true if every element in Mask is undef or if its value
	/// falls within the specified range (L, H].
	static bool isUndefOrInRange(ArrayRef<int> Mask,
	int Low, int Hi) {
	for (int M : Mask)
	if (!isUndefOrInRange(M, Low, Hi))
	return false;
	return true;
	}

	/// Return true if Val is undef, zero or if its value falls within the
	/// specified range (L, H].
	static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
	return isUndefOrZero(Val) \|\| (Val >= Low && Val < Hi);
	}

	/// Return true if every element in Mask is undef, zero or if its value
	/// falls within the specified range (L, H].
	static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
	for (int M : Mask)
	if (!isUndefOrZeroOrInRange(M, Low, Hi))
	return false;
	return true;
	}

	/// Return true if every element in Mask, beginning
	/// from position Pos and ending in Pos+Size, falls within the specified
	/// sequential range (Low, Low+Size]. or is undef.
	static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
	unsigned Pos, unsigned Size, int Low) {
	for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
	if (!isUndefOrEqual(Mask[i], Low))
	return false;
	return true;
	}

	/// Return true if every element in Mask, beginning
	/// from position Pos and ending in Pos+Size, falls within the specified
	/// sequential range (Low, Low+Size], or is undef or is zero.
	static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
	unsigned Size, int Low) {
	for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
	if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
	return false;
	return true;
	}

	/// Return true if every element in Mask, beginning
	/// from position Pos and ending in Pos+Size is undef or is zero.
	static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
	unsigned Size) {
	for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
	if (!isUndefOrZero(Mask[i]))
	return false;
	return true;
	}

	/// \brief Helper function to test whether a shuffle mask could be
	/// simplified by widening the elements being shuffled.
	///
	/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
	/// leaves it in an unspecified state.
	///
	/// NOTE: This must handle normal vector shuffle masks and target vector
	/// shuffle masks. The latter have the special property of a '-2' representing
	/// a zero-ed lane of a vector.
	static bool canWidenShuffleElements(ArrayRef<int> Mask,
	SmallVectorImpl<int> &WidenedMask) {
	WidenedMask.assign(Mask.size() / 2, 0);
	for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
	int M0 = Mask[i];
	int M1 = Mask[i + 1];

	// If both elements are undef, its trivial.
	if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
	WidenedMask[i / 2] = SM_SentinelUndef;
	continue;
	}

	// Check for an undef mask and a mask value properly aligned to fit with
	// a pair of values. If we find such a case, use the non-undef mask's value.
	if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
	WidenedMask[i / 2] = M1 / 2;
	continue;
	}
	if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
	WidenedMask[i / 2] = M0 / 2;
	continue;
	}

	// When zeroing, we need to spread the zeroing across both lanes to widen.
	if (M0 == SM_SentinelZero \|\| M1 == SM_SentinelZero) {
	if ((M0 == SM_SentinelZero \|\| M0 == SM_SentinelUndef) &&
	(M1 == SM_SentinelZero \|\| M1 == SM_SentinelUndef)) {
	WidenedMask[i / 2] = SM_SentinelZero;
	continue;
	}
	return false;
	}

	// Finally check if the two mask values are adjacent and aligned with
	// a pair.
	if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
	WidenedMask[i / 2] = M0 / 2;
	continue;
	}

	// Otherwise we can't safely widen the elements used in this shuffle.
	return false;
	}
	assert(WidenedMask.size() == Mask.size() / 2 &&
	"Incorrect size of mask after widening the elements!");

	return true;
	}

	/// Helper function to scale a shuffle or target shuffle mask, replacing each
	/// mask index with the scaled sequential indices for an equivalent narrowed
	/// mask. This is the reverse process to canWidenShuffleElements, but can always
	/// succeed.
	static void scaleShuffleMask(int Scale, ArrayRef<int> Mask,
	SmallVectorImpl<int> &ScaledMask) {
	assert(0 < Scale && "Unexpected scaling factor");
	int NumElts = Mask.size();
	ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);

	for (int i = 0; i != NumElts; ++i) {
	int M = Mask[i];

	// Repeat sentinel values in every mask element.
	if (M < 0) {
	for (int s = 0; s != Scale; ++s)
	ScaledMask[(Scale * i) + s] = M;
	continue;
	}

	// Scale mask element and increment across each mask element.
	for (int s = 0; s != Scale; ++s)
	ScaledMask[(Scale * i) + s] = (Scale * M) + s;
	}
	}

	/// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector
	/// extract that is suitable for instruction that extract 128 or 256 bit vectors
	static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
	assert((vecWidth == 128 \|\| vecWidth == 256) && "Unexpected vector width");
	if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
	return false;

	// The index should be aligned on a vecWidth-bit boundary.
	uint64_t Index = N->getConstantOperandVal(1);
	MVT VT = N->getSimpleValueType(0);
	unsigned ElSize = VT.getScalarSizeInBits();
	return (Index * ElSize) % vecWidth == 0;
	}

	/// Return true if the specified INSERT_SUBVECTOR
	/// operand specifies a subvector insert that is suitable for input to
	/// insertion of 128 or 256-bit subvectors
	static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
	assert((vecWidth == 128 \|\| vecWidth == 256) && "Unexpected vector width");
	if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
	return false;

	// The index should be aligned on a vecWidth-bit boundary.
	uint64_t Index = N->getConstantOperandVal(2);
	MVT VT = N->getSimpleValueType(0);
	unsigned ElSize = VT.getScalarSizeInBits();
	return (Index * ElSize) % vecWidth == 0;
	}

	bool X86::isVINSERT128Index(SDNode *N) {
	return isVINSERTIndex(N, 128);
	}

	bool X86::isVINSERT256Index(SDNode *N) {
	return isVINSERTIndex(N, 256);
	}

	bool X86::isVEXTRACT128Index(SDNode *N) {
	return isVEXTRACTIndex(N, 128);
	}

	bool X86::isVEXTRACT256Index(SDNode *N) {
	return isVEXTRACTIndex(N, 256);
	}

	static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
	assert((vecWidth == 128 \|\| vecWidth == 256) && "Unsupported vector width");
	assert(isa<ConstantSDNode>(N->getOperand(1).getNode()) &&
	"Illegal extract subvector for VEXTRACT");

	uint64_t Index = N->getConstantOperandVal(1);
	MVT VecVT = N->getOperand(0).getSimpleValueType();
	unsigned NumElemsPerChunk = vecWidth / VecVT.getScalarSizeInBits();
	return Index / NumElemsPerChunk;
	}

	static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
	assert((vecWidth == 128 \|\| vecWidth == 256) && "Unsupported vector width");
	assert(isa<ConstantSDNode>(N->getOperand(2).getNode()) &&
	"Illegal insert subvector for VINSERT");

	uint64_t Index = N->getConstantOperandVal(2);
	MVT VecVT = N->getSimpleValueType(0);
	unsigned NumElemsPerChunk = vecWidth / VecVT.getScalarSizeInBits();
	return Index / NumElemsPerChunk;
	}

	/// Return the appropriate immediate to extract the specified
	/// EXTRACT_SUBVECTOR index with VEXTRACTF128 and VINSERTI128 instructions.
	unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
	return getExtractVEXTRACTImmediate(N, 128);
	}

	/// Return the appropriate immediate to extract the specified
	/// EXTRACT_SUBVECTOR index with VEXTRACTF64x4 and VINSERTI64x4 instructions.
	unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
	return getExtractVEXTRACTImmediate(N, 256);
	}

	/// Return the appropriate immediate to insert at the specified
	/// INSERT_SUBVECTOR index with VINSERTF128 and VINSERTI128 instructions.
	unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
	return getInsertVINSERTImmediate(N, 128);
	}

	/// Return the appropriate immediate to insert at the specified
	/// INSERT_SUBVECTOR index with VINSERTF46x4 and VINSERTI64x4 instructions.
	unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
	return getInsertVINSERTImmediate(N, 256);
	}

	/// Returns true if Elt is a constant zero or a floating point constant +0.0.
	bool X86::isZeroNode(SDValue Elt) {
	return isNullConstant(Elt) \|\| isNullFPConstant(Elt);
	}

	// Build a vector of constants.
	// Use an UNDEF node if MaskElt == -1.
	// Split 64-bit constants in the 32-bit mode.
	static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
	const SDLoc &dl, bool IsMask = false) {

	SmallVector<SDValue, 32> Ops;
	bool Split = false;

	MVT ConstVecVT = VT;
	unsigned NumElts = VT.getVectorNumElements();
	bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
	if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
	ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
	Split = true;
	}

	MVT EltVT = ConstVecVT.getVectorElementType();
	for (unsigned i = 0; i < NumElts; ++i) {
	bool IsUndef = Values[i] < 0 && IsMask;
	SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
	DAG.getConstant(Values[i], dl, EltVT);
	Ops.push_back(OpNode);
	if (Split)
	Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
	DAG.getConstant(0, dl, EltVT));
	}
	SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
	if (Split)
	ConstsNode = DAG.getBitcast(VT, ConstsNode);
	return ConstsNode;
	}

	static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
	MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
	assert(Bits.size() == Undefs.getBitWidth() &&
	"Unequal constant and undef arrays");
	SmallVector<SDValue, 32> Ops;
	bool Split = false;

	MVT ConstVecVT = VT;
	unsigned NumElts = VT.getVectorNumElements();
	bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
	if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
	ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
	Split = true;
	}

	MVT EltVT = ConstVecVT.getVectorElementType();
	for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
	if (Undefs[i]) {
	Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
	continue;
	}
	const APInt &V = Bits[i];
	assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes");
	if (Split) {
	Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
	Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
	} else if (EltVT == MVT::f32) {
	APFloat FV(APFloat::IEEEsingle(), V);
	Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
	} else if (EltVT == MVT::f64) {
	APFloat FV(APFloat::IEEEdouble(), V);
	Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
	} else {
	Ops.push_back(DAG.getConstant(V, dl, EltVT));
	}
	}

	SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
	return DAG.getBitcast(VT, ConstsNode);
	}

	/// Returns a vector of specified type with all zero elements.
	static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
	SelectionDAG &DAG, const SDLoc &dl) {
	assert((VT.is128BitVector() \|\| VT.is256BitVector() \|\| VT.is512BitVector() \|\|
	VT.getVectorElementType() == MVT::i1) &&
	"Unexpected vector type");

	// Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
	// type. This ensures they get CSE'd. But if the integer type is not
	// available, use a floating-point +0.0 instead.
	SDValue Vec;
	if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
	Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
	} else if (VT.getVectorElementType() == MVT::i1) {
	assert((Subtarget.hasBWI() \|\| VT.getVectorNumElements() <= 16) &&
	"Unexpected vector type");
	assert((Subtarget.hasVLX() \|\| VT.getVectorNumElements() >= 8) &&
	"Unexpected vector type");
	Vec = DAG.getConstant(0, dl, VT);
	} else {
	unsigned Num32BitElts = VT.getSizeInBits() / 32;
	Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
	}
	return DAG.getBitcast(VT, Vec);
	}

	static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
	const SDLoc &dl, unsigned vectorWidth) {
	EVT VT = Vec.getValueType();
	EVT ElVT = VT.getVectorElementType();
	unsigned Factor = VT.getSizeInBits()/vectorWidth;
	EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
	VT.getVectorNumElements()/Factor);

	// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
	unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
	assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");

	// This is the index of the first element of the vectorWidth-bit chunk
	// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
	IdxVal &= ~(ElemsPerChunk - 1);

	// If the input is a buildvector just emit a smaller one.
	if (Vec.getOpcode() == ISD::BUILD_VECTOR)
	return DAG.getBuildVector(
	ResultVT, dl, makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));

	SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
	}

	/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
	/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
	/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
	/// instructions or a simple subregister reference. Idx is an index in the
	/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
	/// lowering EXTRACT_VECTOR_ELT operations easier.
	static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
	SelectionDAG &DAG, const SDLoc &dl) {
	assert((Vec.getValueType().is256BitVector() \|\|
	Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
	return extractSubVector(Vec, IdxVal, DAG, dl, 128);
	}

	/// Generate a DAG to grab 256-bits from a 512-bit vector.
	static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
	SelectionDAG &DAG, const SDLoc &dl) {
	assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
	return extractSubVector(Vec, IdxVal, DAG, dl, 256);
	}

	static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
	SelectionDAG &DAG, const SDLoc &dl,
	unsigned vectorWidth) {
	assert((vectorWidth == 128 \|\| vectorWidth == 256) &&
	"Unsupported vector width");
	// Inserting UNDEF is Result
	if (Vec.isUndef())
	return Result;
	EVT VT = Vec.getValueType();
	EVT ElVT = VT.getVectorElementType();
	EVT ResultVT = Result.getValueType();

	// Insert the relevant vectorWidth bits.
	unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
	assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");

	// This is the index of the first element of the vectorWidth-bit chunk
	// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
	IdxVal &= ~(ElemsPerChunk - 1);

	SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
	}

	/// Generate a DAG to put 128-bits into a vector > 128 bits. This
	/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
	/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
	/// simple superregister reference. Idx is an index in the 128 bits
	/// we want. It need not be aligned to a 128-bit boundary. That makes
	/// lowering INSERT_VECTOR_ELT operations easier.
	static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
	SelectionDAG &DAG, const SDLoc &dl) {
	assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
	return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
	}

	static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
	SelectionDAG &DAG, const SDLoc &dl) {
	assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
	return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
	}

	// Return true if the instruction zeroes the unused upper part of the
	// destination and accepts mask.
	static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
	switch (Opcode) {
	default:
	return false;
	case X86ISD::PCMPEQM:
	case X86ISD::PCMPGTM:
	case X86ISD::CMPM:
	case X86ISD::CMPMU:
	return true;
	}
	}

	/// Insert i1-subvector to i1-vector.
	static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {

	SDLoc dl(Op);
	SDValue Vec = Op.getOperand(0);
	SDValue SubVec = Op.getOperand(1);
	SDValue Idx = Op.getOperand(2);

	if (!isa<ConstantSDNode>(Idx))
	return SDValue();

	unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
	if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
	return Op;

	MVT OpVT = Op.getSimpleValueType();
	MVT SubVecVT = SubVec.getSimpleValueType();
	unsigned NumElems = OpVT.getVectorNumElements();
	unsigned SubVecNumElems = SubVecVT.getVectorNumElements();

	assert(IdxVal + SubVecNumElems <= NumElems &&
	IdxVal % SubVecVT.getSizeInBits() == 0 &&
	"Unexpected index value in INSERT_SUBVECTOR");

	// There are 3 possible cases:
	// 1. Subvector should be inserted in the lower part (IdxVal == 0)
	// 2. Subvector should be inserted in the upper part
	// (IdxVal + SubVecNumElems == NumElems)
	// 3. Subvector should be inserted in the middle (for example v2i1
	// to v16i1, index 2)

	// If this node widens - by concatenating zeroes - the type of the result
	// of a node with instruction that zeroes all upper (irrelevant) bits of the
	// output register, mark this node as legal to enable replacing them with
	// the v8i1 version of the previous instruction during instruction selection.
	// For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg,
	// while zeroing all the upper remaining 60 bits of the register. if the
	// result of such instruction is inserted into an allZeroVector, then we can
	// safely remove insert_vector (in instruction selection) as the cmp instr
	// already zeroed the rest of the register.
	if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 &&
	(isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) \|\|
	(SubVec.getOpcode() == ISD::AND &&
	(isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) \|\|
	isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode())))))
	return Op;

	// extend to natively supported kshift
	MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
	MVT WideOpVT = OpVT;
	if (OpVT.getSizeInBits() < MinVT.getStoreSizeInBits())
	WideOpVT = MinVT;

	SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
	SDValue Undef = DAG.getUNDEF(WideOpVT);
	SDValue WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
	Undef, SubVec, ZeroIdx);

	// Extract sub-vector if require.
	auto ExtractSubVec = [&](SDValue V) {
	return (WideOpVT == OpVT) ? V : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
	OpVT, V, ZeroIdx);
	};

	if (Vec.isUndef()) {
	if (IdxVal != 0) {
	SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8);
	WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
	ShiftBits);
	}
	return ExtractSubVec(WideSubVec);
	}

	if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
	NumElems = WideOpVT.getVectorNumElements();
	unsigned ShiftLeft = NumElems - SubVecNumElems;
	unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
	Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
	DAG.getConstant(ShiftLeft, dl, MVT::i8));
	Vec = ShiftRight ? DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
	DAG.getConstant(ShiftRight, dl, MVT::i8)) : Vec;
	return ExtractSubVec(Vec);
	}

	if (IdxVal == 0) {
	// Zero lower bits of the Vec
	SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
	Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
	Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
	Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
	// Merge them together, SubVec should be zero extended.
	WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
	getZeroVector(WideOpVT, Subtarget, DAG, dl),
	SubVec, ZeroIdx);
	Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
	return ExtractSubVec(Vec);
	}

	// Simple case when we put subvector in the upper part
	if (IdxVal + SubVecNumElems == NumElems) {
	// Zero upper bits of the Vec
	WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
	DAG.getConstant(IdxVal, dl, MVT::i8));
	SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
	Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
	Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
	Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
	Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
	return ExtractSubVec(Vec);
	}
	// Subvector should be inserted in the middle - use shuffle
	WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
	SubVec, ZeroIdx);
	SmallVector<int, 64> Mask;
	for (unsigned i = 0; i < NumElems; ++i)
	Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
	i : i + NumElems);
	return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
	}

	/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
	/// instructions. This is used because creating CONCAT_VECTOR nodes of
	/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
	/// large BUILD_VECTORS.
	static SDValue concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
	unsigned NumElems, SelectionDAG &DAG,
	const SDLoc &dl) {
	SDValue V = insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
	return insert128BitVector(V, V2, NumElems / 2, DAG, dl);
	}

	static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
	unsigned NumElems, SelectionDAG &DAG,
	const SDLoc &dl) {
	SDValue V = insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
	return insert256BitVector(V, V2, NumElems / 2, DAG, dl);
	}

	/// Returns a vector of specified type with all bits set.
	/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
	/// Then bitcast to their original type, ensuring they get CSE'd.
	static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
	assert((VT.is128BitVector() \|\| VT.is256BitVector() \|\| VT.is512BitVector()) &&
	"Expected a 128/256/512-bit vector type");

	APInt Ones = APInt::getAllOnesValue(32);
	unsigned NumElts = VT.getSizeInBits() / 32;
	SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
	return DAG.getBitcast(VT, Vec);
	}

	static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
	SelectionDAG &DAG) {
	EVT InVT = In.getValueType();
	assert((X86ISD::VSEXT == Opc \|\| X86ISD::VZEXT == Opc) && "Unexpected opcode");

	if (VT.is128BitVector() && InVT.is128BitVector())
	return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
	: DAG.getZeroExtendVectorInReg(In, DL, VT);

	// For 256-bit vectors, we only need the lower (128-bit) input half.
	// For 512-bit vectors, we only need the lower input half or quarter.
	if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
	int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
	In = extractSubVector(In, 0, DAG, DL,
	std::max(128, (int)VT.getSizeInBits() / Scale));
	}

	return DAG.getNode(Opc, DL, VT, In);
	}

	/// Generate unpacklo/unpackhi shuffle mask.
	static void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
	bool Unary) {
	assert(Mask.empty() && "Expected an empty shuffle mask vector");
	int NumElts = VT.getVectorNumElements();
	int NumEltsInLane = 128 / VT.getScalarSizeInBits();

	for (int i = 0; i < NumElts; ++i) {
	unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
	int Pos = (i % NumEltsInLane) / 2 + LaneStart;
	Pos += (Unary ? 0 : NumElts * (i % 2));
	Pos += (Lo ? 0 : NumEltsInLane / 2);
	Mask.push_back(Pos);
	}
	}

	/// Returns a vector_shuffle node for an unpackl operation.
	static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
	SDValue V1, SDValue V2) {
	SmallVector<int, 8> Mask;
	createUnpackShuffleMask(VT, Mask, /* Lo = / true, / Unary = */ false);
	return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
	}

	/// Returns a vector_shuffle node for an unpackh operation.
	static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
	SDValue V1, SDValue V2) {
	SmallVector<int, 8> Mask;
	createUnpackShuffleMask(VT, Mask, /* Lo = / false, / Unary = */ false);
	return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
	}

	/// Return a vector_shuffle of the specified vector of zero or undef vector.
	/// This produces a shuffle where the low element of V2 is swizzled into the
	/// zero/undef vector, landing at element Idx.
	/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
	static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
	bool IsZero,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = V2.getSimpleValueType();
	SDValue V1 = IsZero
	? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
	int NumElems = VT.getVectorNumElements();
	SmallVector<int, 16> MaskVec(NumElems);
	for (int i = 0; i != NumElems; ++i)
	// If this is the insertion idx, put the low elt of V2 here.
	MaskVec[i] = (i == Idx) ? NumElems : i;
	return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
	}

	static SDValue peekThroughBitcasts(SDValue V) {
	while (V.getNode() && V.getOpcode() == ISD::BITCAST)
	V = V.getOperand(0);
	return V;
	}

	static SDValue peekThroughOneUseBitcasts(SDValue V) {
	while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
	V.getOperand(0).hasOneUse())
	V = V.getOperand(0);
	return V;
	}

	static const Constant *getTargetConstantFromNode(SDValue Op) {
	Op = peekThroughBitcasts(Op);

	auto *Load = dyn_cast<LoadSDNode>(Op);
	if (!Load)
	return nullptr;

	SDValue Ptr = Load->getBasePtr();
	if (Ptr->getOpcode() == X86ISD::Wrapper \|\|
	Ptr->getOpcode() == X86ISD::WrapperRIP)
	Ptr = Ptr->getOperand(0);

	auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
	if (!CNode \|\| CNode->isMachineConstantPoolEntry())
	return nullptr;

	return dyn_cast<Constant>(CNode->getConstVal());
	}

	// Extract raw constant bits from constant pools.
	static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
	APInt &UndefElts,
	SmallVectorImpl<APInt> &EltBits,
	bool AllowWholeUndefs = true,
	bool AllowPartialUndefs = true) {
	assert(EltBits.empty() && "Expected an empty EltBits vector");

	Op = peekThroughBitcasts(Op);

	EVT VT = Op.getValueType();
	unsigned SizeInBits = VT.getSizeInBits();
	assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!");
	unsigned NumElts = SizeInBits / EltSizeInBits;

	// Bitcast a source array of element bits to the target size.
	auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
	unsigned NumSrcElts = UndefSrcElts.getBitWidth();
	unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
	assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&
	"Constant bit sizes don't match");

	// Don't split if we don't allow undef bits.
	bool AllowUndefs = AllowWholeUndefs \|\| AllowPartialUndefs;
	if (UndefSrcElts.getBoolValue() && !AllowUndefs)
	return false;

	// If we're already the right size, don't bother bitcasting.
	if (NumSrcElts == NumElts) {
	UndefElts = UndefSrcElts;
	EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
	return true;
	}

	// Extract all the undef/constant element data and pack into single bitsets.
	APInt UndefBits(SizeInBits, 0);
	APInt MaskBits(SizeInBits, 0);

	for (unsigned i = 0; i != NumSrcElts; ++i) {
	unsigned BitOffset = i * SrcEltSizeInBits;
	if (UndefSrcElts[i])
	UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
	MaskBits.insertBits(SrcEltBits[i], BitOffset);
	}

	// Split the undef/constant single bitset data into the target elements.
	UndefElts = APInt(NumElts, 0);
	EltBits.resize(NumElts, APInt(EltSizeInBits, 0));

	for (unsigned i = 0; i != NumElts; ++i) {
	unsigned BitOffset = i * EltSizeInBits;
	APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);

	// Only treat an element as UNDEF if all bits are UNDEF.
	if (UndefEltBits.isAllOnesValue()) {
	if (!AllowWholeUndefs)
	return false;
	UndefElts.setBit(i);
	continue;
	}

	// If only some bits are UNDEF then treat them as zero (or bail if not
	// supported).
	if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
	return false;

	APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
	EltBits[i] = Bits.getZExtValue();
	}
	return true;
	};

	// Collect constant bits and insert into mask/undef bit masks.
	auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
	unsigned UndefBitIndex) {
	if (!Cst)
	return false;
	if (isa<UndefValue>(Cst)) {
	Undefs.setBit(UndefBitIndex);
	return true;
	}
	if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
	Mask = CInt->getValue();
	return true;
	}
	if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
	Mask = CFP->getValueAPF().bitcastToAPInt();
	return true;
	}
	return false;
	};

	// Extract constant bits from build vector.
	if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
	unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
	unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;

	APInt UndefSrcElts(NumSrcElts, 0);
	SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
	for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
	const SDValue &Src = Op.getOperand(i);
	if (Src.isUndef()) {
	UndefSrcElts.setBit(i);
	continue;
	}
	auto *Cst = cast<ConstantSDNode>(Src);
	SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
	}
	return CastBitData(UndefSrcElts, SrcEltBits);
	}

	// Extract constant bits from constant pool vector.
	if (auto *Cst = getTargetConstantFromNode(Op)) {
	Type *CstTy = Cst->getType();
	if (!CstTy->isVectorTy() \|\| (SizeInBits != CstTy->getPrimitiveSizeInBits()))
	return false;

	unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
	unsigned NumSrcElts = CstTy->getVectorNumElements();

	APInt UndefSrcElts(NumSrcElts, 0);
	SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
	for (unsigned i = 0; i != NumSrcElts; ++i)
	if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
	UndefSrcElts, i))
	return false;

	return CastBitData(UndefSrcElts, SrcEltBits);
	}

	// Extract constant bits from a broadcasted constant pool scalar.
	if (Op.getOpcode() == X86ISD::VBROADCAST &&
	EltSizeInBits <= VT.getScalarSizeInBits()) {
	if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
	unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
	unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;

	APInt UndefSrcElts(NumSrcElts, 0);
	SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
	if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
	if (UndefSrcElts[0])
	UndefSrcElts.setBits(0, NumSrcElts);
	SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
	return CastBitData(UndefSrcElts, SrcEltBits);
	}
	}
	}

	// Extract a rematerialized scalar constant insertion.
	if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
	Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
	isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
	unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
	unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;

	APInt UndefSrcElts(NumSrcElts, 0);
	SmallVector<APInt, 64> SrcEltBits;
	auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
	SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
	SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
	return CastBitData(UndefSrcElts, SrcEltBits);
	}

	return false;
	}

	static bool getTargetShuffleMaskIndices(SDValue MaskNode,
	unsigned MaskEltSizeInBits,
	SmallVectorImpl<uint64_t> &RawMask) {
	APInt UndefElts;
	SmallVector<APInt, 64> EltBits;

	// Extract the raw target constant bits.
	// FIXME: We currently don't support UNDEF bits or mask entries.
	if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
	EltBits, /* AllowWholeUndefs */ false,
	/* AllowPartialUndefs */ false))
	return false;

	// Insert the extracted elements into the mask.
	for (APInt Elt : EltBits)
	RawMask.push_back(Elt.getZExtValue());

	return true;
	}

	/// Calculates the shuffle mask corresponding to the target-specific opcode.
	/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
	/// operands in \p Ops, and returns true.
	/// Sets \p IsUnary to true if only one source is used. Note that this will set
	/// IsUnary for shuffles which use a single input multiple times, and in those
	/// cases it will adjust the mask to only have indices within that single input.
	/// It is an error to call this with non-empty Mask/Ops vectors.
	static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
	SmallVectorImpl<SDValue> &Ops,
	SmallVectorImpl<int> &Mask, bool &IsUnary) {
	unsigned NumElems = VT.getVectorNumElements();
	SDValue ImmN;

	assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector");
	assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector");

	IsUnary = false;
	bool IsFakeUnary = false;
	switch(N->getOpcode()) {
	case X86ISD::BLENDI:
	ImmN = N->getOperand(N->getNumOperands()-1);
	DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::SHUFP:
	ImmN = N->getOperand(N->getNumOperands()-1);
	DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::INSERTPS:
	ImmN = N->getOperand(N->getNumOperands()-1);
	DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::EXTRQI:
	if (isa<ConstantSDNode>(N->getOperand(1)) &&
	isa<ConstantSDNode>(N->getOperand(2))) {
	int BitLen = N->getConstantOperandVal(1);
	int BitIdx = N->getConstantOperandVal(2);
	DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask);
	IsUnary = true;
	}
	break;
	case X86ISD::INSERTQI:
	if (isa<ConstantSDNode>(N->getOperand(2)) &&
	isa<ConstantSDNode>(N->getOperand(3))) {
	int BitLen = N->getConstantOperandVal(2);
	int BitIdx = N->getConstantOperandVal(3);
	DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	}
	break;
	case X86ISD::UNPCKH:
	DecodeUNPCKHMask(VT, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::UNPCKL:
	DecodeUNPCKLMask(VT, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::MOVHLPS:
	DecodeMOVHLPSMask(NumElems, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::MOVLHPS:
	DecodeMOVLHPSMask(NumElems, Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::PALIGNR:
	assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
	ImmN = N->getOperand(N->getNumOperands()-1);
	DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	Ops.push_back(N->getOperand(1));
	Ops.push_back(N->getOperand(0));
	break;
	case X86ISD::VSHLDQ:
	assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
	ImmN = N->getOperand(N->getNumOperands() - 1);
	DecodePSLLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
	IsUnary = true;
	break;
	case X86ISD::VSRLDQ:
	assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
	ImmN = N->getOperand(N->getNumOperands() - 1);
	DecodePSRLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
	IsUnary = true;
	break;
	case X86ISD::PSHUFD:
	case X86ISD::VPERMILPI:
	ImmN = N->getOperand(N->getNumOperands()-1);
	DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
	IsUnary = true;
	break;
	case X86ISD::PSHUFHW:
	ImmN = N->getOperand(N->getNumOperands()-1);
	DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
	IsUnary = true;
	break;
	case X86ISD::PSHUFLW:
	ImmN = N->getOperand(N->getNumOperands()-1);
	DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
	IsUnary = true;
	break;
	case X86ISD::VZEXT_MOVL:
	DecodeZeroMoveLowMask(VT, Mask);
	IsUnary = true;
	break;
	case X86ISD::VBROADCAST: {
	SDValue N0 = N->getOperand(0);
	// See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so,
	// add the pre-extracted value to the Ops vector.
	if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	N0.getOperand(0).getValueType() == VT &&
	N0.getConstantOperandVal(1) == 0)
	Ops.push_back(N0.getOperand(0));

	// We only decode broadcasts of same-sized vectors, unless the broadcast
	// came from an extract from the original width. If we found one, we
	// pushed it the Ops vector above.
	if (N0.getValueType() == VT \|\| !Ops.empty()) {
	DecodeVectorBroadcast(VT, Mask);
	IsUnary = true;
	break;
	}
	return false;
	}
	case X86ISD::VPERMILPV: {
	IsUnary = true;
	SDValue MaskNode = N->getOperand(1);
	unsigned MaskEltSize = VT.getScalarSizeInBits();
	SmallVector<uint64_t, 32> RawMask;
	if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
	DecodeVPERMILPMask(VT, RawMask, Mask);
	break;
	}
	if (auto *C = getTargetConstantFromNode(MaskNode)) {
	DecodeVPERMILPMask(C, MaskEltSize, Mask);
	break;
	}
	return false;
	}
	case X86ISD::PSHUFB: {
	IsUnary = true;
	SDValue MaskNode = N->getOperand(1);
	SmallVector<uint64_t, 32> RawMask;
	if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
	DecodePSHUFBMask(RawMask, Mask);
	break;
	}
	if (auto *C = getTargetConstantFromNode(MaskNode)) {
	DecodePSHUFBMask(C, Mask);
	break;
	}
	return false;
	}
	case X86ISD::VPERMI:
	ImmN = N->getOperand(N->getNumOperands()-1);
	DecodeVPERMMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
	IsUnary = true;
	break;
	case X86ISD::MOVSS:
	case X86ISD::MOVSD:
	DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
	break;
	case X86ISD::VPERM2X128:
	ImmN = N->getOperand(N->getNumOperands()-1);
	DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	break;
	case X86ISD::MOVSLDUP:
	DecodeMOVSLDUPMask(VT, Mask);
	IsUnary = true;
	break;
	case X86ISD::MOVSHDUP:
	DecodeMOVSHDUPMask(VT, Mask);
	IsUnary = true;
	break;
	case X86ISD::MOVDDUP:
	DecodeMOVDDUPMask(VT, Mask);
	IsUnary = true;
	break;
	case X86ISD::MOVLHPD:
	case X86ISD::MOVLPD:
	case X86ISD::MOVLPS:
	// Not yet implemented
	return false;
	case X86ISD::VPERMIL2: {
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	unsigned MaskEltSize = VT.getScalarSizeInBits();
	SDValue MaskNode = N->getOperand(2);
	SDValue CtrlNode = N->getOperand(3);
	if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
	unsigned CtrlImm = CtrlOp->getZExtValue();
	SmallVector<uint64_t, 32> RawMask;
	if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
	DecodeVPERMIL2PMask(VT, CtrlImm, RawMask, Mask);
	break;
	}
	if (auto *C = getTargetConstantFromNode(MaskNode)) {
	DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
	break;
	}
	}
	return false;
	}
	case X86ISD::VPPERM: {
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
	SDValue MaskNode = N->getOperand(2);
	SmallVector<uint64_t, 32> RawMask;
	if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
	DecodeVPPERMMask(RawMask, Mask);
	break;
	}
	if (auto *C = getTargetConstantFromNode(MaskNode)) {
	DecodeVPPERMMask(C, Mask);
	break;
	}
	return false;
	}
	case X86ISD::VPERMV: {
	IsUnary = true;
	// Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
	Ops.push_back(N->getOperand(1));
	SDValue MaskNode = N->getOperand(0);
	SmallVector<uint64_t, 32> RawMask;
	unsigned MaskEltSize = VT.getScalarSizeInBits();
	if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
	DecodeVPERMVMask(RawMask, Mask);
	break;
	}
	if (auto *C = getTargetConstantFromNode(MaskNode)) {
	DecodeVPERMVMask(C, MaskEltSize, Mask);
	break;
	}
	return false;
	}
	case X86ISD::VPERMV3: {
	IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2);
	// Unlike most shuffle nodes, VPERMV3's mask operand is the middle one.
	Ops.push_back(N->getOperand(0));
	Ops.push_back(N->getOperand(2));
	SDValue MaskNode = N->getOperand(1);
	unsigned MaskEltSize = VT.getScalarSizeInBits();
	if (auto *C = getTargetConstantFromNode(MaskNode)) {
	DecodeVPERMV3Mask(C, MaskEltSize, Mask);
	break;
	}
	return false;
	}
	case X86ISD::VPERMIV3: {
	IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2);
	// Unlike most shuffle nodes, VPERMIV3's mask operand is the first one.
	Ops.push_back(N->getOperand(1));
	Ops.push_back(N->getOperand(2));
	SDValue MaskNode = N->getOperand(0);
	unsigned MaskEltSize = VT.getScalarSizeInBits();
	if (auto *C = getTargetConstantFromNode(MaskNode)) {
	DecodeVPERMV3Mask(C, MaskEltSize, Mask);
	break;
	}
	return false;
	}
	default: llvm_unreachable("unknown target shuffle node");
	}

	// Empty mask indicates the decode failed.
	if (Mask.empty())
	return false;

	// Check if we're getting a shuffle mask with zero'd elements.
	if (!AllowSentinelZero)
	if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
	return false;

	// If we have a fake unary shuffle, the shuffle mask is spread across two
	// inputs that are actually the same node. Re-map the mask to always point
	// into the first input.
	if (IsFakeUnary)
	for (int &M : Mask)
	if (M >= (int)Mask.size())
	M -= Mask.size();

	// If we didn't already add operands in the opcode-specific code, default to
	// adding 1 or 2 operands starting at 0.
	if (Ops.empty()) {
	Ops.push_back(N->getOperand(0));
	if (!IsUnary \|\| IsFakeUnary)
	Ops.push_back(N->getOperand(1));
	}

	return true;
	}

	/// Check a target shuffle mask's inputs to see if we can set any values to
	/// SM_SentinelZero - this is for elements that are known to be zero
	/// (not just zeroable) from their inputs.
	/// Returns true if the target shuffle mask was decoded.
	static bool setTargetShuffleZeroElements(SDValue N,
	SmallVectorImpl<int> &Mask,
	SmallVectorImpl<SDValue> &Ops) {
	bool IsUnary;
	if (!isTargetShuffle(N.getOpcode()))
	return false;

	MVT VT = N.getSimpleValueType();
	if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary))
	return false;

	SDValue V1 = Ops[0];
	SDValue V2 = IsUnary ? V1 : Ops[1];

	V1 = peekThroughBitcasts(V1);
	V2 = peekThroughBitcasts(V2);

	assert((VT.getSizeInBits() % Mask.size()) == 0 &&
	"Illegal split of shuffle value type");
	unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size();

	// Extract known constant input data.
	APInt UndefSrcElts[2];
	SmallVector<APInt, 32> SrcEltBits[2];
	bool IsSrcConstant[2] = {
	getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0],
	SrcEltBits[0], true, false),
	getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1],
	SrcEltBits[1], true, false)};

	for (int i = 0, Size = Mask.size(); i < Size; ++i) {
	int M = Mask[i];

	// Already decoded as SM_SentinelZero / SM_SentinelUndef.
	if (M < 0)
	continue;

	// Determine shuffle input and normalize the mask.
	unsigned SrcIdx = M / Size;
	SDValue V = M < Size ? V1 : V2;
	M %= Size;

	// We are referencing an UNDEF input.
	if (V.isUndef()) {
	Mask[i] = SM_SentinelUndef;
	continue;
	}

	// SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF.
	// TODO: We currently only set UNDEF for integer types - floats use the same
	// registers as vectors and many of the scalar folded loads rely on the
	// SCALAR_TO_VECTOR pattern.
	if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	(Size % V.getValueType().getVectorNumElements()) == 0) {
	int Scale = Size / V.getValueType().getVectorNumElements();
	int Idx = M / Scale;
	if (Idx != 0 && !VT.isFloatingPoint())
	Mask[i] = SM_SentinelUndef;
	else if (Idx == 0 && X86::isZeroNode(V.getOperand(0)))
	Mask[i] = SM_SentinelZero;
	continue;
	}

	// Attempt to extract from the source's constant bits.
	if (IsSrcConstant[SrcIdx]) {
	if (UndefSrcElts[SrcIdx][M])
	Mask[i] = SM_SentinelUndef;
	else if (SrcEltBits[SrcIdx][M] == 0)
	Mask[i] = SM_SentinelZero;
	}
	}

	assert(VT.getVectorNumElements() == Mask.size() &&
	"Different mask size from vector size!");
	return true;
	}

	// Attempt to decode ops that could be represented as a shuffle mask.
	// The decoded shuffle mask may contain a different number of elements to the
	// destination value type.
	static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
	SmallVectorImpl<SDValue> &Ops,
	SelectionDAG &DAG) {
	Mask.clear();
	Ops.clear();

	MVT VT = N.getSimpleValueType();
	unsigned NumElts = VT.getVectorNumElements();
	unsigned NumSizeInBits = VT.getSizeInBits();
	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
	assert((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 &&
	"Expected byte aligned value types");

	unsigned Opcode = N.getOpcode();
	switch (Opcode) {
	case ISD::AND:
	case X86ISD::ANDNP: {
	// Attempt to decode as a per-byte mask.
	APInt UndefElts;
	SmallVector<APInt, 32> EltBits;
	SDValue N0 = N.getOperand(0);
	SDValue N1 = N.getOperand(1);
	bool IsAndN = (X86ISD::ANDNP == Opcode);
	uint64_t ZeroMask = IsAndN ? 255 : 0;
	if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
	return false;
	for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
	if (UndefElts[i]) {
	Mask.push_back(SM_SentinelUndef);
	continue;
	}
	uint64_t ByteBits = EltBits[i].getZExtValue();
	if (ByteBits != 0 && ByteBits != 255)
	return false;
	Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i);
	}
	Ops.push_back(IsAndN ? N1 : N0);
	return true;
	}
	case ISD::SCALAR_TO_VECTOR: {
	// Match against a scalar_to_vector of an extract from a vector,
	// for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
	SDValue N0 = N.getOperand(0);
	SDValue SrcExtract;

	if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	N0.getOperand(0).getValueType() == VT) {
	SrcExtract = N0;
	} else if (N0.getOpcode() == ISD::AssertZext &&
	N0.getOperand(0).getOpcode() == X86ISD::PEXTRW &&
	cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i16) {
	SrcExtract = N0.getOperand(0);
	assert(SrcExtract.getOperand(0).getValueType() == MVT::v8i16);
	} else if (N0.getOpcode() == ISD::AssertZext &&
	N0.getOperand(0).getOpcode() == X86ISD::PEXTRB &&
	cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i8) {
	SrcExtract = N0.getOperand(0);
	assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8);
	}

	if (!SrcExtract \|\| !isa<ConstantSDNode>(SrcExtract.getOperand(1)))
	return false;

	SDValue SrcVec = SrcExtract.getOperand(0);
	EVT SrcVT = SrcVec.getValueType();
	unsigned NumSrcElts = SrcVT.getVectorNumElements();
	unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1;

	unsigned SrcIdx = SrcExtract.getConstantOperandVal(1);
	if (NumSrcElts <= SrcIdx)
	return false;

	Ops.push_back(SrcVec);
	Mask.push_back(SrcIdx);
	Mask.append(NumZeros, SM_SentinelZero);
	Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef);
	return true;
	}
	case X86ISD::PINSRB:
	case X86ISD::PINSRW: {
	SDValue InVec = N.getOperand(0);
	SDValue InScl = N.getOperand(1);
	uint64_t InIdx = N.getConstantOperandVal(2);
	assert(InIdx < NumElts && "Illegal insertion index");

	// Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern.
	if (X86::isZeroNode(InScl)) {
	Ops.push_back(InVec);
	for (unsigned i = 0; i != NumElts; ++i)
	Mask.push_back(i == InIdx ? SM_SentinelZero : (int)i);
	return true;
	}

	// Attempt to recognise a PINSR(ASSERTZEXT(PEXTR)) shuffle pattern.
	// TODO: Expand this to support INSERT_VECTOR_ELT/etc.
	unsigned ExOp =
	(X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW);
	if (InScl.getOpcode() != ISD::AssertZext \|\|
	InScl.getOperand(0).getOpcode() != ExOp)
	return false;

	SDValue ExVec = InScl.getOperand(0).getOperand(0);
	uint64_t ExIdx = InScl.getOperand(0).getConstantOperandVal(1);
	assert(ExIdx < NumElts && "Illegal extraction index");
	Ops.push_back(InVec);
	Ops.push_back(ExVec);
	for (unsigned i = 0; i != NumElts; ++i)
	Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
	return true;
	}
	case X86ISD::PACKSS: {
	// If we know input saturation won't happen we can treat this
	// as a truncation shuffle.
	if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt \|\|
	DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt)
	return false;

	Ops.push_back(N.getOperand(0));
	Ops.push_back(N.getOperand(1));
	for (unsigned i = 0; i != NumElts; ++i)
	Mask.push_back(i * 2);
	return true;
	}
	case X86ISD::VSHLI:
	case X86ISD::VSRLI: {
	uint64_t ShiftVal = N.getConstantOperandVal(1);
	// Out of range bit shifts are guaranteed to be zero.
	if (NumBitsPerElt <= ShiftVal) {
	Mask.append(NumElts, SM_SentinelZero);
	return true;
	}

	// We can only decode 'whole byte' bit shifts as shuffles.
	if ((ShiftVal % 8) != 0)
	break;

	uint64_t ByteShift = ShiftVal / 8;
	unsigned NumBytes = NumSizeInBits / 8;
	unsigned NumBytesPerElt = NumBitsPerElt / 8;
	Ops.push_back(N.getOperand(0));

	// Clear mask to all zeros and insert the shifted byte indices.
	Mask.append(NumBytes, SM_SentinelZero);

	if (X86ISD::VSHLI == Opcode) {
	for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
	for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
	Mask[i + j] = i + j - ByteShift;
	} else {
	for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
	for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
	Mask[i + j - ByteShift] = i + j;
	}
	return true;
	}
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	case X86ISD::VZEXT: {
	// TODO - add support for VPMOVZX with smaller input vector types.
	SDValue Src = N.getOperand(0);
	MVT SrcVT = Src.getSimpleValueType();
	if (NumSizeInBits != SrcVT.getSizeInBits())
	break;
	DecodeZeroExtendMask(SrcVT.getScalarType(), VT, Mask);
	Ops.push_back(Src);
	return true;
	}
	}

	return false;
	}

	/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly.
	static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
	SmallVectorImpl<int> &Mask) {
	int MaskWidth = Mask.size();
	SmallVector<SDValue, 16> UsedInputs;
	for (int i = 0, e = Inputs.size(); i < e; ++i) {
	int lo = UsedInputs.size() * MaskWidth;
	int hi = lo + MaskWidth;
	if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
	UsedInputs.push_back(Inputs[i]);
	continue;
	}
	for (int &M : Mask)
	if (lo <= M)
	M -= MaskWidth;
	}
	Inputs = UsedInputs;
	}

	/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs
	/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the
	/// remaining input indices in case we now have a unary shuffle and adjust the
	/// inputs accordingly.
	/// Returns true if the target shuffle mask was decoded.
	static bool resolveTargetShuffleInputs(SDValue Op,
	SmallVectorImpl<SDValue> &Inputs,
	SmallVectorImpl<int> &Mask,
	SelectionDAG &DAG) {
	if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
	if (!getFauxShuffleMask(Op, Mask, Inputs, DAG))
	return false;

	resolveTargetShuffleInputsAndMask(Inputs, Mask);
	return true;
	}

	/// Returns the scalar element that will make up the ith
	/// element of the result of the vector shuffle.
	static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
	unsigned Depth) {
	if (Depth == 6)
	return SDValue(); // Limit search depth.

	SDValue V = SDValue(N, 0);
	EVT VT = V.getValueType();
	unsigned Opcode = V.getOpcode();

	// Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
	if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
	int Elt = SV->getMaskElt(Index);

	if (Elt < 0)
	return DAG.getUNDEF(VT.getVectorElementType());

	unsigned NumElems = VT.getVectorNumElements();
	SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
	: SV->getOperand(1);
	return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
	}

	// Recurse into target specific vector shuffles to find scalars.
	if (isTargetShuffle(Opcode)) {
	MVT ShufVT = V.getSimpleValueType();
	MVT ShufSVT = ShufVT.getVectorElementType();
	int NumElems = (int)ShufVT.getVectorNumElements();
	SmallVector<int, 16> ShuffleMask;
	SmallVector<SDValue, 16> ShuffleOps;
	bool IsUnary;

	if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, IsUnary))
	return SDValue();

	int Elt = ShuffleMask[Index];
	if (Elt == SM_SentinelZero)
	return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(N), ShufSVT)
	: DAG.getConstantFP(+0.0, SDLoc(N), ShufSVT);
	if (Elt == SM_SentinelUndef)
	return DAG.getUNDEF(ShufSVT);

	assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range");
	SDValue NewV = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1];
	return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
	Depth+1);
	}

	// Actual nodes that may contain scalar elements
	if (Opcode == ISD::BITCAST) {
	V = V.getOperand(0);
	EVT SrcVT = V.getValueType();
	unsigned NumElems = VT.getVectorNumElements();

	if (!SrcVT.isVector() \|\| SrcVT.getVectorNumElements() != NumElems)
	return SDValue();
	}

	if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
	return (Index == 0) ? V.getOperand(0)
	: DAG.getUNDEF(VT.getVectorElementType());

	if (V.getOpcode() == ISD::BUILD_VECTOR)
	return V.getOperand(Index);

	return SDValue();
	}

	/// Custom lower build_vector of v16i8.
	static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
	unsigned NumNonZero, unsigned NumZero,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (NumNonZero > 8 && !Subtarget.hasSSE41())
	return SDValue();

	SDLoc dl(Op);
	SDValue V;
	bool First = true;

	// SSE4.1 - use PINSRB to insert each byte directly.
	if (Subtarget.hasSSE41()) {
	for (unsigned i = 0; i < 16; ++i) {
	bool IsNonZero = (NonZeros & (1 << i)) != 0;
	if (IsNonZero) {
	// If the build vector contains zeros or our first insertion is not the
	// first index then insert into zero vector to break any register
	// dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
	if (First) {
	First = false;
	if (NumZero \|\| 0 != i)
	V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
	else {
	assert(0 == i && "Expected insertion into zero-index");
	V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
	V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
	V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
	V = DAG.getBitcast(MVT::v16i8, V);
	continue;
	}
	}
	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V,
	Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
	}
	}

	return V;
	}

	// Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
	for (unsigned i = 0; i < 16; ++i) {
	bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
	if (ThisIsNonZero && First) {
	if (NumZero)
	V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
	else
	V = DAG.getUNDEF(MVT::v8i16);
	First = false;
	}

	if ((i & 1) != 0) {
	// FIXME: Investigate extending to i32 instead of just i16.
	// FIXME: Investigate combining the first 4 bytes as a i32 instead.
	SDValue ThisElt, LastElt;
	bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
	if (LastIsNonZero) {
	LastElt =
	DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1));
	}
	if (ThisIsNonZero) {
	ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
	ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt,
	DAG.getConstant(8, dl, MVT::i8));
	if (LastIsNonZero)
	ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
	} else
	ThisElt = LastElt;

	if (ThisElt) {
	if (1 == i) {
	V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
	: DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
	V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
	V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
	V = DAG.getBitcast(MVT::v8i16, V);
	} else {
	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
	DAG.getIntPtrConstant(i / 2, dl));
	}
	}
	}
	}

	return DAG.getBitcast(MVT::v16i8, V);
	}

	/// Custom lower build_vector of v8i16.
	static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
	unsigned NumNonZero, unsigned NumZero,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (NumNonZero > 4 && !Subtarget.hasSSE41())
	return SDValue();

	SDLoc dl(Op);
	SDValue V;
	bool First = true;
	for (unsigned i = 0; i < 8; ++i) {
	bool IsNonZero = (NonZeros & (1 << i)) != 0;
	if (IsNonZero) {
	// If the build vector contains zeros or our first insertion is not the
	// first index then insert into zero vector to break any register
	// dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
	if (First) {
	First = false;
	if (NumZero \|\| 0 != i)
	V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
	else {
	assert(0 == i && "Expected insertion into zero-index");
	V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
	V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
	V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
	V = DAG.getBitcast(MVT::v8i16, V);
	continue;
	}
	}
	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V,
	Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
	}
	}

	return V;
	}

	/// Custom lower build_vector of v4i32 or v4f32.
	static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// Find all zeroable elements.
	std::bitset<4> Zeroable;
	for (int i=0; i < 4; ++i) {
	SDValue Elt = Op->getOperand(i);
	Zeroable[i] = (Elt.isUndef() \|\| X86::isZeroNode(Elt));
	}
	assert(Zeroable.size() - Zeroable.count() > 1 &&
	"We expect at least two non-zero elements!");

	// We only know how to deal with build_vector nodes where elements are either
	// zeroable or extract_vector_elt with constant index.
	SDValue FirstNonZero;
	unsigned FirstNonZeroIdx;
	for (unsigned i=0; i < 4; ++i) {
	if (Zeroable[i])
	continue;
	SDValue Elt = Op->getOperand(i);
	if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	!isa<ConstantSDNode>(Elt.getOperand(1)))
	return SDValue();
	// Make sure that this node is extracting from a 128-bit vector.
	MVT VT = Elt.getOperand(0).getSimpleValueType();
	if (!VT.is128BitVector())
	return SDValue();
	if (!FirstNonZero.getNode()) {
	FirstNonZero = Elt;
	FirstNonZeroIdx = i;
	}
	}

	assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!");
	SDValue V1 = FirstNonZero.getOperand(0);
	MVT VT = V1.getSimpleValueType();

	// See if this build_vector can be lowered as a blend with zero.
	SDValue Elt;
	unsigned EltMaskIdx, EltIdx;
	int Mask[4];
	for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
	if (Zeroable[EltIdx]) {
	// The zero vector will be on the right hand side.
	Mask[EltIdx] = EltIdx+4;
	continue;
	}

	Elt = Op->getOperand(EltIdx);
	// By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
	EltMaskIdx = Elt.getConstantOperandVal(1);
	if (Elt.getOperand(0) != V1 \|\| EltMaskIdx != EltIdx)
	break;
	Mask[EltIdx] = EltIdx;
	}

	if (EltIdx == 4) {
	// Let the shuffle legalizer deal with blend operations.
	SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
	if (V1.getSimpleValueType() != VT)
	V1 = DAG.getBitcast(VT, V1);
	return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, Mask);
	}

	// See if we can lower this build_vector to a INSERTPS.
	if (!Subtarget.hasSSE41())
	return SDValue();

	SDValue V2 = Elt.getOperand(0);
	if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
	V1 = SDValue();

	bool CanFold = true;
	for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
	if (Zeroable[i])
	continue;

	SDValue Current = Op->getOperand(i);
	SDValue SrcVector = Current->getOperand(0);
	if (!V1.getNode())
	V1 = SrcVector;
	CanFold = (SrcVector == V1) && (Current.getConstantOperandVal(1) == i);
	}

	if (!CanFold)
	return SDValue();

	assert(V1.getNode() && "Expected at least two non-zero elements!");
	if (V1.getSimpleValueType() != MVT::v4f32)
	V1 = DAG.getBitcast(MVT::v4f32, V1);
	if (V2.getSimpleValueType() != MVT::v4f32)
	V2 = DAG.getBitcast(MVT::v4f32, V2);

	// Ok, we can emit an INSERTPS instruction.
	unsigned ZMask = Zeroable.to_ulong();

	unsigned InsertPSMask = EltMaskIdx << 6 \| EltIdx << 4 \| ZMask;
	assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
	SDLoc DL(Op);
	SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
	DAG.getIntPtrConstant(InsertPSMask, DL));
	return DAG.getBitcast(VT, Result);
	}

	/// Return a vector logical shift node.
	static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
	SelectionDAG &DAG, const TargetLowering &TLI,
	const SDLoc &dl) {
	assert(VT.is128BitVector() && "Unknown type for VShift");
	MVT ShVT = MVT::v16i8;
	unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
	SrcOp = DAG.getBitcast(ShVT, SrcOp);
	MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
	assert(NumBits % 8 == 0 && "Only support byte sized shifts");
	SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
	return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
	}

	static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
	SelectionDAG &DAG) {

	// Check if the scalar load can be widened into a vector load. And if
	// the address is "base + cst" see if the cst can be "absorbed" into
	// the shuffle mask.
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
	SDValue Ptr = LD->getBasePtr();
	if (!ISD::isNormalLoad(LD) \|\| LD->isVolatile())
	return SDValue();
	EVT PVT = LD->getValueType(0);
	if (PVT != MVT::i32 && PVT != MVT::f32)
	return SDValue();

	int FI = -1;
	int64_t Offset = 0;
	if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
	FI = FINode->getIndex();
	Offset = 0;
	} else if (DAG.isBaseWithConstantOffset(Ptr) &&
	isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
	FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
	Offset = Ptr.getConstantOperandVal(1);
	Ptr = Ptr.getOperand(0);
	} else {
	return SDValue();
	}

	// FIXME: 256-bit vector instructions don't require a strict alignment,
	// improve this code to support it better.
	unsigned RequiredAlign = VT.getSizeInBits()/8;
	SDValue Chain = LD->getChain();
	// Make sure the stack object alignment is at least 16 or 32.
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
	if (MFI.isFixedObjectIndex(FI)) {
	// Can't change the alignment. FIXME: It's possible to compute
	// the exact stack offset and reference FI + adjust offset instead.
	// If someone really cares about this. That's the way to implement it.
	return SDValue();
	} else {
	MFI.setObjectAlignment(FI, RequiredAlign);
	}
	}

	// (Offset % 16 or 32) must be multiple of 4. Then address is then
	// Ptr + (Offset & ~15).
	if (Offset < 0)
	return SDValue();
	if ((Offset % RequiredAlign) & 3)
	return SDValue();
	int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1);
	if (StartOffset) {
	SDLoc DL(Ptr);
	Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
	DAG.getConstant(StartOffset, DL, Ptr.getValueType()));
	}

	int EltNo = (Offset - StartOffset) >> 2;
	unsigned NumElems = VT.getVectorNumElements();

	EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
	SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
	LD->getPointerInfo().getWithOffset(StartOffset));

	SmallVector<int, 8> Mask(NumElems, EltNo);

	return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), Mask);
	}

	return SDValue();
	}

	/// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
	/// elements can be replaced by a single large load which has the same value as
	/// a build_vector or insert_subvector whose loaded operands are 'Elts'.
	///
	/// Example: <load i32 a, load i32 a+4, zero, undef> -> zextload a
	static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
	const SDLoc &DL, SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	bool isAfterLegalize) {
	unsigned NumElems = Elts.size();

	int LastLoadedElt = -1;
	SmallBitVector LoadMask(NumElems, false);
	SmallBitVector ZeroMask(NumElems, false);
	SmallBitVector UndefMask(NumElems, false);

	// For each element in the initializer, see if we've found a load, zero or an
	// undef.
	for (unsigned i = 0; i < NumElems; ++i) {
	SDValue Elt = peekThroughBitcasts(Elts[i]);
	if (!Elt.getNode())
	return SDValue();

	if (Elt.isUndef())
	UndefMask[i] = true;
	else if (X86::isZeroNode(Elt) \|\| ISD::isBuildVectorAllZeros(Elt.getNode()))
	ZeroMask[i] = true;
	else if (ISD::isNON_EXTLoad(Elt.getNode())) {
	LoadMask[i] = true;
	LastLoadedElt = i;
	// Each loaded element must be the correct fractional portion of the
	// requested vector load.
	if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits())
	return SDValue();
	} else
	return SDValue();
	}
	assert((ZeroMask \| UndefMask \| LoadMask).count() == NumElems &&
	"Incomplete element masks");

	// Handle Special Cases - all undef or undef/zero.
	if (UndefMask.count() == NumElems)
	return DAG.getUNDEF(VT);

	// FIXME: Should we return this as a BUILD_VECTOR instead?
	if ((ZeroMask \| UndefMask).count() == NumElems)
	return VT.isInteger() ? DAG.getConstant(0, DL, VT)
	: DAG.getConstantFP(0.0, DL, VT);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	int FirstLoadedElt = LoadMask.find_first();
	SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]);
	LoadSDNode *LDBase = cast<LoadSDNode>(EltBase);
	EVT LDBaseVT = EltBase.getValueType();

	// Consecutive loads can contain UNDEFS but not ZERO elements.
	// Consecutive loads with UNDEFs and ZEROs elements require a
	// an additional shuffle stage to clear the ZERO elements.
	bool IsConsecutiveLoad = true;
	bool IsConsecutiveLoadWithZeros = true;
	for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
	if (LoadMask[i]) {
	SDValue Elt = peekThroughBitcasts(Elts[i]);
	LoadSDNode *LD = cast<LoadSDNode>(Elt);
	if (!DAG.areNonVolatileConsecutiveLoads(
	LD, LDBase, Elt.getValueType().getStoreSizeInBits() / 8,
	i - FirstLoadedElt)) {
	IsConsecutiveLoad = false;
	IsConsecutiveLoadWithZeros = false;
	break;
	}
	} else if (ZeroMask[i]) {
	IsConsecutiveLoad = false;
	}
	}

	auto CreateLoad = [&DAG, &DL](EVT VT, LoadSDNode *LDBase) {
	auto MMOFlags = LDBase->getMemOperand()->getFlags();
	assert(!(MMOFlags & MachineMemOperand::MOVolatile) &&
	"Cannot merge volatile loads.");
	SDValue NewLd =
	DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
	LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
	DAG.makeEquivalentMemoryOrdering(LDBase, NewLd);
	return NewLd;
	};

	// LOAD - all consecutive load/undefs (must start/end with a load).
	// If we have found an entire vector of loads and undefs, then return a large
	// load of the entire vector width starting at the base pointer.
	// If the vector contains zeros, then attempt to shuffle those elements.
	if (FirstLoadedElt == 0 && LastLoadedElt == (int)(NumElems - 1) &&
	(IsConsecutiveLoad \|\| IsConsecutiveLoadWithZeros)) {
	assert(LDBase && "Did not find base load for merging consecutive loads");
	EVT EltVT = LDBase->getValueType(0);
	// Ensure that the input vector size for the merged loads matches the
	// cumulative size of the input elements.
	if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
	return SDValue();

	if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
	return SDValue();

	// Don't create 256-bit non-temporal aligned loads without AVX2 as these
	// will lower to regular temporal loads and use the cache.
	if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 &&
	VT.is256BitVector() && !Subtarget.hasInt256())
	return SDValue();

	if (IsConsecutiveLoad)
	return CreateLoad(VT, LDBase);

	// IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
	// vector and a zero vector to clear out the zero elements.
	if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) {
	SmallVector<int, 4> ClearMask(NumElems, -1);
	for (unsigned i = 0; i < NumElems; ++i) {
	if (ZeroMask[i])
	ClearMask[i] = i + NumElems;
	else if (LoadMask[i])
	ClearMask[i] = i;
	}
	SDValue V = CreateLoad(VT, LDBase);
	SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
	: DAG.getConstantFP(0.0, DL, VT);
	return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);
	}
	}

	int LoadSize =
	(1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits();

	// VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
	if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
	(LoadSize == 32 \|\| LoadSize == 64) &&
	((VT.is128BitVector() \|\| VT.is256BitVector() \|\| VT.is512BitVector()))) {
	MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSize)
	: MVT::getIntegerVT(LoadSize);
	MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize);
	if (TLI.isTypeLegal(VecVT)) {
	SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
	SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
	SDValue ResNode =
	DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT,
	LDBase->getPointerInfo(),
	LDBase->getAlignment(),
	false/isVolatile/, true/ReadMem/,
	false/WriteMem/);
	DAG.makeEquivalentMemoryOrdering(LDBase, ResNode);
	return DAG.getBitcast(VT, ResNode);
	}
	}

	return SDValue();
	}

	static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
	unsigned SplatBitSize, LLVMContext &C) {
	unsigned ScalarSize = VT.getScalarSizeInBits();
	unsigned NumElm = SplatBitSize / ScalarSize;

	SmallVector<Constant *, 32> ConstantVec;
	for (unsigned i = 0; i < NumElm; i++) {
	APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
	Constant *Const;
	if (VT.isFloatingPoint()) {
	if (ScalarSize == 32) {
	Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
	} else {
	assert(ScalarSize == 64 && "Unsupported floating point scalar size");
	Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
	}
	} else
	Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
	ConstantVec.push_back(Const);
	}
	return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
	}

	static bool isUseOfShuffle(SDNode *N) {
	for (auto *U : N->uses()) {
	if (isTargetShuffle(U->getOpcode()))
	return true;
	if (U->getOpcode() == ISD::BITCAST) // Ignore bitcasts
	return isUseOfShuffle(U);
	}
	return false;
	}

	/// Attempt to use the vbroadcast instruction to generate a splat value
	/// from a splat BUILD_VECTOR which uses:
	/// a. A single scalar load, or a constant.
	/// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>).
	///
	/// The VBROADCAST node is returned when a pattern is found,
	/// or SDValue() otherwise.
	static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	// VBROADCAST requires AVX.
	// TODO: Splats could be generated for non-AVX CPUs using SSE
	// instructions, but there's less potential gain for only 128-bit vectors.
	if (!Subtarget.hasAVX())
	return SDValue();

	MVT VT = BVOp->getSimpleValueType(0);
	SDLoc dl(BVOp);

	assert((VT.is128BitVector() \|\| VT.is256BitVector() \|\| VT.is512BitVector()) &&
	"Unsupported vector type for broadcast.");

	BitVector UndefElements;
	SDValue Ld = BVOp->getSplatValue(&UndefElements);

	// We need a splat of a single value to use broadcast, and it doesn't
	// make any sense if the value is only in one element of the vector.
	if (!Ld \|\| (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
	APInt SplatValue, Undef;
	unsigned SplatBitSize;
	bool HasUndef;
	// Check if this is a repeated constant pattern suitable for broadcasting.
	if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) &&
	SplatBitSize > VT.getScalarSizeInBits() &&
	SplatBitSize < VT.getSizeInBits()) {
	// Avoid replacing with broadcast when it's a use of a shuffle
	// instruction to preserve the present custom lowering of shuffles.
	if (isUseOfShuffle(BVOp) \|\| BVOp->hasOneUse())
	return SDValue();
	// replace BUILD_VECTOR with broadcast of the repeated constants.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	LLVMContext *Ctx = DAG.getContext();
	MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
	if (Subtarget.hasAVX()) {
	if (SplatBitSize <= 64 && Subtarget.hasAVX2() &&
	!(SplatBitSize == 64 && Subtarget.is32Bit())) {
	// Splatted value can fit in one INTEGER constant in constant pool.
	// Load the constant and broadcast it.
	MVT CVT = MVT::getIntegerVT(SplatBitSize);
	Type ScalarTy = Type::getIntNTy(Ctx, SplatBitSize);
	Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue);
	SDValue CP = DAG.getConstantPool(C, PVT);
	unsigned Repeat = VT.getSizeInBits() / SplatBitSize;

	unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
	Ld = DAG.getLoad(
	CVT, dl, DAG.getEntryNode(), CP,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
	Alignment);
	SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
	MVT::getVectorVT(CVT, Repeat), Ld);
	return DAG.getBitcast(VT, Brdcst);
	} else if (SplatBitSize == 32 \|\| SplatBitSize == 64) {
	// Splatted value can fit in one FLOAT constant in constant pool.
	// Load the constant and broadcast it.
	// AVX have support for 32 and 64 bit broadcast for floats only.
	// No 64bit integer in 32bit subtarget.
	MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
	// Lower the splat via APFloat directly, to avoid any conversion.
	Constant *C =
	SplatBitSize == 32
	? ConstantFP::get(*Ctx,
	APFloat(APFloat::IEEEsingle(), SplatValue))
	: ConstantFP::get(*Ctx,
	APFloat(APFloat::IEEEdouble(), SplatValue));
	SDValue CP = DAG.getConstantPool(C, PVT);
	unsigned Repeat = VT.getSizeInBits() / SplatBitSize;

	unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
	Ld = DAG.getLoad(
	CVT, dl, DAG.getEntryNode(), CP,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
	Alignment);
	SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
	MVT::getVectorVT(CVT, Repeat), Ld);
	return DAG.getBitcast(VT, Brdcst);
	} else if (SplatBitSize > 64) {
	// Load the vector of constants and broadcast it.
	MVT CVT = VT.getScalarType();
	Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
	*Ctx);
	SDValue VCP = DAG.getConstantPool(VecC, PVT);
	unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
	unsigned Alignment = cast<ConstantPoolSDNode>(VCP)->getAlignment();
	Ld = DAG.getLoad(
	MVT::getVectorVT(CVT, NumElm), dl, DAG.getEntryNode(), VCP,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
	Alignment);
	SDValue Brdcst = DAG.getNode(X86ISD::SUBV_BROADCAST, dl, VT, Ld);
	return DAG.getBitcast(VT, Brdcst);
	}
	}
	}
	return SDValue();
	}

	bool ConstSplatVal =
	(Ld.getOpcode() == ISD::Constant \|\| Ld.getOpcode() == ISD::ConstantFP);

	// Make sure that all of the users of a non-constant load are from the
	// BUILD_VECTOR node.
	if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
	return SDValue();

	unsigned ScalarSize = Ld.getValueSizeInBits();
	bool IsGE256 = (VT.getSizeInBits() >= 256);

	// When optimizing for size, generate up to 5 extra bytes for a broadcast
	// instruction to save 8 or more bytes of constant pool data.
	// TODO: If multiple splats are generated to load the same constant,
	// it may be detrimental to overall size. There needs to be a way to detect
	// that condition to know if this is truly a size win.
	bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();

	// Handle broadcasting a single constant scalar from the constant pool
	// into a vector.
	// On Sandybridge (no AVX2), it is still better to load a constant vector
	// from the constant pool and not to broadcast it from a scalar.
	// But override that restriction when optimizing for size.
	// TODO: Check if splatting is recommended for other AVX-capable CPUs.
	if (ConstSplatVal && (Subtarget.hasAVX2() \|\| OptForSize)) {
	EVT CVT = Ld.getValueType();
	assert(!CVT.isVector() && "Must not broadcast a vector type");

	// Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
	// For size optimization, also splat v2f64 and v2i64, and for size opt
	// with AVX2, also splat i8 and i16.
	// With pattern matching, the VBROADCAST node may become a VMOVDDUP.
	if (ScalarSize == 32 \|\| (IsGE256 && ScalarSize == 64) \|\|
	(OptForSize && (ScalarSize == 64 \|\| Subtarget.hasAVX2()))) {
	const Constant *C = nullptr;
	if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
	C = CI->getConstantIntValue();
	else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
	C = CF->getConstantFPValue();

	assert(C && "Invalid constant type");

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue CP =
	DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
	unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
	Ld = DAG.getLoad(
	CVT, dl, DAG.getEntryNode(), CP,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
	Alignment);

	return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
	}
	}

	bool IsLoad = ISD::isNormalLoad(Ld.getNode());

	// Handle AVX2 in-register broadcasts.
	if (!IsLoad && Subtarget.hasInt256() &&
	(ScalarSize == 32 \|\| (IsGE256 && ScalarSize == 64)))
	return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);

	// The scalar source must be a normal load.
	if (!IsLoad)
	return SDValue();

	if (ScalarSize == 32 \|\| (IsGE256 && ScalarSize == 64) \|\|
	(Subtarget.hasVLX() && ScalarSize == 64))
	return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);

	// The integer check is needed for the 64-bit into 128-bit so it doesn't match
	// double since there is no vbroadcastsd xmm
	if (Subtarget.hasInt256() && Ld.getValueType().isInteger()) {
	if (ScalarSize == 8 \|\| ScalarSize == 16 \|\| ScalarSize == 64)
	return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
	}

	// Unsupported broadcast.
	return SDValue();
	}

	/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
	/// underlying vector and index.
	///
	/// Modifies \p ExtractedFromVec to the real vector and returns the real
	/// index.
	static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
	SDValue ExtIdx) {
	int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
	if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
	return Idx;

	// For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
	// lowered this:
	// (extract_vector_elt (v8f32 %vreg1), Constant<6>)
	// to:
	// (extract_vector_elt (vector_shuffle<2,u,u,u>
	// (extract_subvector (v8f32 %vreg0), Constant<4>),
	// undef)
	// Constant<0>)
	// In this case the vector is the extract_subvector expression and the index
	// is 2, as specified by the shuffle.
	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
	SDValue ShuffleVec = SVOp->getOperand(0);
	MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
	assert(ShuffleVecVT.getVectorElementType() ==
	ExtractedFromVec.getSimpleValueType().getVectorElementType());

	int ShuffleIdx = SVOp->getMaskElt(Idx);
	if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
	ExtractedFromVec = ShuffleVec;
	return ShuffleIdx;
	}
	return Idx;
	}

	static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();

	// Skip if insert_vec_elt is not supported.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
	return SDValue();

	SDLoc DL(Op);
	unsigned NumElems = Op.getNumOperands();

	SDValue VecIn1;
	SDValue VecIn2;
	SmallVector<unsigned, 4> InsertIndices;
	SmallVector<int, 8> Mask(NumElems, -1);

	for (unsigned i = 0; i != NumElems; ++i) {
	unsigned Opc = Op.getOperand(i).getOpcode();

	if (Opc == ISD::UNDEF)
	continue;

	if (Opc != ISD::EXTRACT_VECTOR_ELT) {
	// Quit if more than 1 elements need inserting.
	if (InsertIndices.size() > 1)
	return SDValue();

	InsertIndices.push_back(i);
	continue;
	}

	SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
	SDValue ExtIdx = Op.getOperand(i).getOperand(1);

	// Quit if non-constant index.
	if (!isa<ConstantSDNode>(ExtIdx))
	return SDValue();
	int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);

	// Quit if extracted from vector of different type.
	if (ExtractedFromVec.getValueType() != VT)
	return SDValue();

	if (!VecIn1.getNode())
	VecIn1 = ExtractedFromVec;
	else if (VecIn1 != ExtractedFromVec) {
	if (!VecIn2.getNode())
	VecIn2 = ExtractedFromVec;
	else if (VecIn2 != ExtractedFromVec)
	// Quit if more than 2 vectors to shuffle
	return SDValue();
	}

	if (ExtractedFromVec == VecIn1)
	Mask[i] = Idx;
	else if (ExtractedFromVec == VecIn2)
	Mask[i] = Idx + NumElems;
	}

	if (!VecIn1.getNode())
	return SDValue();

	VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
	SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);

	for (unsigned Idx : InsertIndices)
	NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
	DAG.getIntPtrConstant(Idx, DL));

	return NV;
	}

	static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
	assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
	Op.getScalarValueSizeInBits() == 1 &&
	"Can not convert non-constant vector");
	uint64_t Immediate = 0;
	for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
	SDValue In = Op.getOperand(idx);
	if (!In.isUndef())
	Immediate \|= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
	}
	SDLoc dl(Op);
	MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8));
	return DAG.getConstant(Immediate, dl, VT);
	}
	// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
	SDValue
	X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {

	MVT VT = Op.getSimpleValueType();
	assert((VT.getVectorElementType() == MVT::i1) &&
	"Unexpected type in LowerBUILD_VECTORvXi1!");

	SDLoc dl(Op);
	if (ISD::isBuildVectorAllZeros(Op.getNode()))
	return DAG.getTargetConstant(0, dl, VT);

	if (ISD::isBuildVectorAllOnes(Op.getNode()))
	return DAG.getTargetConstant(1, dl, VT);

	if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
	SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
	if (Imm.getValueSizeInBits() == VT.getSizeInBits())
	return DAG.getBitcast(VT, Imm);
	SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
	DAG.getIntPtrConstant(0, dl));
	}

	// Vector has one or more non-const elements
	uint64_t Immediate = 0;
	SmallVector<unsigned, 16> NonConstIdx;
	bool IsSplat = true;
	bool HasConstElts = false;
	int SplatIdx = -1;
	for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
	SDValue In = Op.getOperand(idx);
	if (In.isUndef())
	continue;
	if (!isa<ConstantSDNode>(In))
	NonConstIdx.push_back(idx);
	else {
	Immediate \|= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
	HasConstElts = true;
	}
	if (SplatIdx < 0)
	SplatIdx = idx;
	else if (In != Op.getOperand(SplatIdx))
	IsSplat = false;
	}

	// for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
	if (IsSplat)
	return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx),
	DAG.getConstant(1, dl, VT),
	DAG.getConstant(0, dl, VT));

	// insert elements one by one
	SDValue DstVec;
	SDValue Imm;
	if (Immediate) {
	MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
	Imm = DAG.getConstant(Immediate, dl, ImmVT);
	}
	else if (HasConstElts)
	Imm = DAG.getConstant(0, dl, VT);
	else
	Imm = DAG.getUNDEF(VT);
	if (Imm.getValueSizeInBits() == VT.getSizeInBits())
	DstVec = DAG.getBitcast(VT, Imm);
	else {
	SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
	DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
	DAG.getIntPtrConstant(0, dl));
	}

	for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) {
	unsigned InsertIdx = NonConstIdx[i];
	DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
	Op.getOperand(InsertIdx),
	DAG.getIntPtrConstant(InsertIdx, dl));
	}
	return DstVec;
	}

	/// \brief Return true if \p N implements a horizontal binop and return the
	/// operands for the horizontal binop into V0 and V1.
	///
	/// This is a helper function of LowerToHorizontalOp().
	/// This function checks that the build_vector \p N in input implements a
	/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
	/// operation to match.
	/// For example, if \p Opcode is equal to ISD::ADD, then this function
	/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
	/// is equal to ISD::SUB, then this function checks if this is a horizontal
	/// arithmetic sub.
	///
	/// This function only analyzes elements of \p N whose indices are
	/// in range [BaseIdx, LastIdx).
	static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
	SelectionDAG &DAG,
	unsigned BaseIdx, unsigned LastIdx,
	SDValue &V0, SDValue &V1) {
	EVT VT = N->getValueType(0);

	assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
	assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
	"Invalid Vector in input!");

	bool IsCommutable = (Opcode == ISD::ADD \|\| Opcode == ISD::FADD);
	bool CanFold = true;
	unsigned ExpectedVExtractIdx = BaseIdx;
	unsigned NumElts = LastIdx - BaseIdx;
	V0 = DAG.getUNDEF(VT);
	V1 = DAG.getUNDEF(VT);

	// Check if N implements a horizontal binop.
	for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
	SDValue Op = N->getOperand(i + BaseIdx);

	// Skip UNDEFs.
	if (Op->isUndef()) {
	// Update the expected vector extract index.
	if (i * 2 == NumElts)
	ExpectedVExtractIdx = BaseIdx;
	ExpectedVExtractIdx += 2;
	continue;
	}

	CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();

	if (!CanFold)
	break;

	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	// Try to match the following pattern:
	// (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
	CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	Op0.getOperand(0) == Op1.getOperand(0) &&
	isa<ConstantSDNode>(Op0.getOperand(1)) &&
	isa<ConstantSDNode>(Op1.getOperand(1)));
	if (!CanFold)
	break;

	unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
	unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();

	if (i * 2 < NumElts) {
	if (V0.isUndef()) {
	V0 = Op0.getOperand(0);
	if (V0.getValueType() != VT)
	return false;
	}
	} else {
	if (V1.isUndef()) {
	V1 = Op0.getOperand(0);
	if (V1.getValueType() != VT)
	return false;
	}
	if (i * 2 == NumElts)
	ExpectedVExtractIdx = BaseIdx;
	}

	SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
	if (I0 == ExpectedVExtractIdx)
	CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
	else if (IsCommutable && I1 == ExpectedVExtractIdx) {
	// Try to match the following dag sequence:
	// (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
	CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
	} else
	CanFold = false;

	ExpectedVExtractIdx += 2;
	}

	return CanFold;
	}

	/// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
	/// a concat_vector.
	///
	/// This is a helper function of LowerToHorizontalOp().
	/// This function expects two 256-bit vectors called V0 and V1.
	/// At first, each vector is split into two separate 128-bit vectors.
	/// Then, the resulting 128-bit vectors are used to implement two
	/// horizontal binary operations.
	///
	/// The kind of horizontal binary operation is defined by \p X86Opcode.
	///
	/// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
	/// the two new horizontal binop.
	/// When Mode is set, the first horizontal binop dag node would take as input
	/// the lower 128-bit of V0 and the upper 128-bit of V0. The second
	/// horizontal binop dag node would take as input the lower 128-bit of V1
	/// and the upper 128-bit of V1.
	/// Example:
	/// HADD V0_LO, V0_HI
	/// HADD V1_LO, V1_HI
	///
	/// Otherwise, the first horizontal binop dag node takes as input the lower
	/// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
	/// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
	/// Example:
	/// HADD V0_LO, V1_LO
	/// HADD V0_HI, V1_HI
	///
	/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
	/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
	/// the upper 128-bits of the result.
	static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
	const SDLoc &DL, SelectionDAG &DAG,
	unsigned X86Opcode, bool Mode,
	bool isUndefLO, bool isUndefHI) {
	MVT VT = V0.getSimpleValueType();
	assert(VT.is256BitVector() && VT == V1.getSimpleValueType() &&
	"Invalid nodes in input!");

	unsigned NumElts = VT.getVectorNumElements();
	SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL);
	SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL);
	SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL);
	SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL);
	MVT NewVT = V0_LO.getSimpleValueType();

	SDValue LO = DAG.getUNDEF(NewVT);
	SDValue HI = DAG.getUNDEF(NewVT);

	if (Mode) {
	// Don't emit a horizontal binop if the result is expected to be UNDEF.
	if (!isUndefLO && !V0->isUndef())
	LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
	if (!isUndefHI && !V1->isUndef())
	HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
	} else {
	// Don't emit a horizontal binop if the result is expected to be UNDEF.
	if (!isUndefLO && (!V0_LO->isUndef() \|\| !V1_LO->isUndef()))
	LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);

	if (!isUndefHI && (!V0_HI->isUndef() \|\| !V1_HI->isUndef()))
	HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
	}

	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
	}

	/// Returns true iff \p BV builds a vector with the result equivalent to
	/// the result of ADDSUB operation.
	/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation
	/// are written to the parameters \p Opnd0 and \p Opnd1.
	static bool isAddSub(const BuildVectorSDNode *BV,
	const X86Subtarget &Subtarget, SelectionDAG &DAG,
	SDValue &Opnd0, SDValue &Opnd1) {

	MVT VT = BV->getSimpleValueType(0);
	if ((!Subtarget.hasSSE3() \|\| (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
	(!Subtarget.hasAVX() \|\| (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
	(!Subtarget.hasAVX512() \|\| (VT != MVT::v16f32 && VT != MVT::v8f64)))
	return false;

	unsigned NumElts = VT.getVectorNumElements();
	SDValue InVec0 = DAG.getUNDEF(VT);
	SDValue InVec1 = DAG.getUNDEF(VT);

	// Odd-numbered elements in the input build vector are obtained from
	// adding two integer/float elements.
	// Even-numbered elements in the input build vector are obtained from
	// subtracting two integer/float elements.
	unsigned ExpectedOpcode = ISD::FSUB;
	unsigned NextExpectedOpcode = ISD::FADD;
	bool AddFound = false;
	bool SubFound = false;

	for (unsigned i = 0, e = NumElts; i != e; ++i) {
	SDValue Op = BV->getOperand(i);

	// Skip 'undef' values.
	unsigned Opcode = Op.getOpcode();
	if (Opcode == ISD::UNDEF) {
	std::swap(ExpectedOpcode, NextExpectedOpcode);
	continue;
	}

	// Early exit if we found an unexpected opcode.
	if (Opcode != ExpectedOpcode)
	return false;

	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	// Try to match the following pattern:
	// (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
	// Early exit if we cannot match that sequence.
	if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	!isa<ConstantSDNode>(Op0.getOperand(1)) \|\|
	!isa<ConstantSDNode>(Op1.getOperand(1)) \|\|
	Op0.getOperand(1) != Op1.getOperand(1))
	return false;

	unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
	if (I0 != i)
	return false;

	// We found a valid add/sub node. Update the information accordingly.
	if (i & 1)
	AddFound = true;
	else
	SubFound = true;

	// Update InVec0 and InVec1.
	if (InVec0.isUndef()) {
	InVec0 = Op0.getOperand(0);
	if (InVec0.getSimpleValueType() != VT)
	return false;
	}
	if (InVec1.isUndef()) {
	InVec1 = Op1.getOperand(0);
	if (InVec1.getSimpleValueType() != VT)
	return false;
	}

	// Make sure that operands in input to each add/sub node always
	// come from a same pair of vectors.
	if (InVec0 != Op0.getOperand(0)) {
	if (ExpectedOpcode == ISD::FSUB)
	return false;

	// FADD is commutable. Try to commute the operands
	// and then test again.
	std::swap(Op0, Op1);
	if (InVec0 != Op0.getOperand(0))
	return false;
	}

	if (InVec1 != Op1.getOperand(0))
	return false;

	// Update the pair of expected opcodes.
	std::swap(ExpectedOpcode, NextExpectedOpcode);
	}

	// Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
	if (!AddFound \|\| !SubFound \|\| InVec0.isUndef() \|\| InVec1.isUndef())
	return false;

	Opnd0 = InVec0;
	Opnd1 = InVec1;
	return true;
	}

	/// Returns true if is possible to fold MUL and an idiom that has already been
	/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1).
	/// If (and only if) true is returned, the operands of FMADDSUB are written to
	/// parameters \p Opnd0, \p Opnd1, \p Opnd2.
	///
	/// Prior to calling this function it should be known that there is some
	/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
	/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called
	/// before replacement of such SDNode with ADDSUB operation. Thus the number
	/// of \p Opnd0 uses is expected to be equal to 2.
	/// For example, this function may be called for the following IR:
	/// %AB = fmul fast <2 x double> %A, %B
	/// %Sub = fsub fast <2 x double> %AB, %C
	/// %Add = fadd fast <2 x double> %AB, %C
	/// %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add,
	/// <2 x i32> <i32 0, i32 3>
	/// There is a def for %Addsub here, which potentially can be replaced by
	/// X86ISD::ADDSUB operation:
	/// %Addsub = X86ISD::ADDSUB %AB, %C
	/// and such ADDSUB can further be replaced with FMADDSUB:
	/// %Addsub = FMADDSUB %A, %B, %C.
	///
	/// The main reason why this method is called before the replacement of the
	/// recognized ADDSUB idiom with ADDSUB operation is that such replacement
	/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
	/// FMADDSUB is.
	static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG,
	SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2) {
	if (Opnd0.getOpcode() != ISD::FMUL \|\| Opnd0->use_size() != 2 \|\|
	!Subtarget.hasAnyFMA())
	return false;

	// FIXME: These checks must match the similar ones in
	// DAGCombiner::visitFADDForFMACombine. It would be good to have one
	// function that would answer if it is Ok to fuse MUL + ADD to FMADD
	// or MUL + ADDSUB to FMADDSUB.
	const TargetOptions &Options = DAG.getTarget().Options;
	bool AllowFusion =
	(Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath);
	if (!AllowFusion)
	return false;

	Opnd2 = Opnd1;
	Opnd1 = Opnd0.getOperand(1);
	Opnd0 = Opnd0.getOperand(0);

	return true;
	}

	/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation
	/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node.
	static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue Opnd0, Opnd1;
	if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1))
	return SDValue();

	MVT VT = BV->getSimpleValueType(0);
	SDLoc DL(BV);

	// Try to generate X86ISD::FMADDSUB node here.
	SDValue Opnd2;
	if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
	return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);

	// Do not generate X86ISD::ADDSUB node for 512-bit types even though
	// the ADDSUB idiom has been successfully recognized. There are no known
	// X86 targets with 512-bit ADDSUB instructions!
	// 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom
	// recognition.
	if (VT.is512BitVector())
	return SDValue();

	return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
	}

	/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
	static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = BV->getSimpleValueType(0);
	unsigned NumElts = VT.getVectorNumElements();
	unsigned NumUndefsLO = 0;
	unsigned NumUndefsHI = 0;
	unsigned Half = NumElts/2;

	// Count the number of UNDEF operands in the build_vector in input.
	for (unsigned i = 0, e = Half; i != e; ++i)
	if (BV->getOperand(i)->isUndef())
	NumUndefsLO++;

	for (unsigned i = Half, e = NumElts; i != e; ++i)
	if (BV->getOperand(i)->isUndef())
	NumUndefsHI++;

	// Early exit if this is either a build_vector of all UNDEFs or all the
	// operands but one are UNDEF.
	if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
	return SDValue();

	SDLoc DL(BV);
	SDValue InVec0, InVec1;
	if ((VT == MVT::v4f32 \|\| VT == MVT::v2f64) && Subtarget.hasSSE3()) {
	// Try to match an SSE3 float HADD/HSUB.
	if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
	return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);

	if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
	return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
	} else if ((VT == MVT::v4i32 \|\| VT == MVT::v8i16) && Subtarget.hasSSSE3()) {
	// Try to match an SSSE3 integer HADD/HSUB.
	if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
	return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);

	if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
	return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
	}

	if (!Subtarget.hasAVX())
	return SDValue();

	if ((VT == MVT::v8f32 \|\| VT == MVT::v4f64)) {
	// Try to match an AVX horizontal add/sub of packed single/double
	// precision floating point values from 256-bit vectors.
	SDValue InVec2, InVec3;
	if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
	isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
	((InVec0.isUndef() \|\| InVec2.isUndef()) \|\| InVec0 == InVec2) &&
	((InVec1.isUndef() \|\| InVec3.isUndef()) \|\| InVec1 == InVec3))
	return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);

	if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
	isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
	((InVec0.isUndef() \|\| InVec2.isUndef()) \|\| InVec0 == InVec2) &&
	((InVec1.isUndef() \|\| InVec3.isUndef()) \|\| InVec1 == InVec3))
	return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
	} else if (VT == MVT::v8i32 \|\| VT == MVT::v16i16) {
	// Try to match an AVX2 horizontal add/sub of signed integers.
	SDValue InVec2, InVec3;
	unsigned X86Opcode;
	bool CanFold = true;

	if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
	isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
	((InVec0.isUndef() \|\| InVec2.isUndef()) \|\| InVec0 == InVec2) &&
	((InVec1.isUndef() \|\| InVec3.isUndef()) \|\| InVec1 == InVec3))
	X86Opcode = X86ISD::HADD;
	else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
	isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
	((InVec0.isUndef() \|\| InVec2.isUndef()) \|\| InVec0 == InVec2) &&
	((InVec1.isUndef() \|\| InVec3.isUndef()) \|\| InVec1 == InVec3))
	X86Opcode = X86ISD::HSUB;
	else
	CanFold = false;

	if (CanFold) {
	// Fold this build_vector into a single horizontal add/sub.
	// Do this only if the target has AVX2.
	if (Subtarget.hasAVX2())
	return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);

	// Do not try to expand this build_vector into a pair of horizontal
	// add/sub if we can emit a pair of scalar add/sub.
	if (NumUndefsLO + 1 == Half \|\| NumUndefsHI + 1 == Half)
	return SDValue();

	// Convert this build_vector into a pair of horizontal binop followed by
	// a concat vector.
	bool isUndefLO = NumUndefsLO == Half;
	bool isUndefHI = NumUndefsHI == Half;
	return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
	isUndefLO, isUndefHI);
	}
	}

	if ((VT == MVT::v8f32 \|\| VT == MVT::v4f64 \|\| VT == MVT::v8i32 \|\|
	VT == MVT::v16i16) && Subtarget.hasAVX()) {
	unsigned X86Opcode;
	if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
	X86Opcode = X86ISD::HADD;
	else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
	X86Opcode = X86ISD::HSUB;
	else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
	X86Opcode = X86ISD::FHADD;
	else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
	X86Opcode = X86ISD::FHSUB;
	else
	return SDValue();

	// Don't try to expand this build_vector into a pair of horizontal add/sub
	// if we can simply emit a pair of scalar add/sub.
	if (NumUndefsLO + 1 == Half \|\| NumUndefsHI + 1 == Half)
	return SDValue();

	// Convert this build_vector into two horizontal add/sub followed by
	// a concat vector.
	bool isUndefLO = NumUndefsLO == Half;
	bool isUndefHI = NumUndefsHI == Half;
	return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
	isUndefLO, isUndefHI);
	}

	return SDValue();
	}

	/// If a BUILD_VECTOR's source elements all apply the same bit operation and
	/// one of their operands is constant, lower to a pair of BUILD_VECTOR and
	/// just apply the bit to the vectors.
	/// NOTE: Its not in our interest to start make a general purpose vectorizer
	/// from this, but enough scalar bit operations are created from the later
	/// legalization + scalarization stages to need basic support.
	static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
	SelectionDAG &DAG) {
	SDLoc DL(Op);
	MVT VT = Op->getSimpleValueType(0);
	unsigned NumElems = VT.getVectorNumElements();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// Check that all elements have the same opcode.
	// TODO: Should we allow UNDEFS and if so how many?
	unsigned Opcode = Op->getOperand(0).getOpcode();
	for (unsigned i = 1; i < NumElems; ++i)
	if (Opcode != Op->getOperand(i).getOpcode())
	return SDValue();

	// TODO: We may be able to add support for other Ops (ADD/SUB + shifts).
	switch (Opcode) {
	default:
	return SDValue();
	case ISD::AND:
	case ISD::XOR:
	case ISD::OR:
	if (!TLI.isOperationLegalOrPromote(Opcode, VT))
	return SDValue();
	break;
	}

	SmallVector<SDValue, 4> LHSElts, RHSElts;
	for (SDValue Elt : Op->ops()) {
	SDValue LHS = Elt.getOperand(0);
	SDValue RHS = Elt.getOperand(1);

	// We expect the canonicalized RHS operand to be the constant.
	if (!isa<ConstantSDNode>(RHS))
	return SDValue();
	LHSElts.push_back(LHS);
	RHSElts.push_back(RHS);
	}

	SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
	SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
	return DAG.getNode(Opcode, DL, VT, LHS, RHS);
	}

	/// Create a vector constant without a load. SSE/AVX provide the bare minimum
	/// functionality to do this, so it's all zeros, all ones, or some derivation
	/// that is cheap to calculate.
	static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc DL(Op);
	MVT VT = Op.getSimpleValueType();

	// Vectors containing all zeros can be matched by pxor and xorps.
	if (ISD::isBuildVectorAllZeros(Op.getNode())) {
	// Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
	// and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
	if (VT == MVT::v4i32 \|\| VT == MVT::v8i32 \|\| VT == MVT::v16i32)
	return Op;

	return getZeroVector(VT, Subtarget, DAG, DL);
	}

	// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
	// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
	// vpcmpeqd on 256-bit vectors.
	if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
	if (VT == MVT::v4i32 \|\| VT == MVT::v16i32 \|\|
	(VT == MVT::v8i32 && Subtarget.hasInt256()))
	return Op;

	return getOnesVector(VT, DAG, DL);
	}

	return SDValue();
	}

	SDValue
	X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
	SDLoc dl(Op);

	MVT VT = Op.getSimpleValueType();
	MVT ExtVT = VT.getVectorElementType();
	unsigned NumElems = Op.getNumOperands();

	// Generate vectors for predicate vectors.
	if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
	return LowerBUILD_VECTORvXi1(Op, DAG);

	if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
	return VectorConstant;

	BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
	if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
	return AddSub;
	if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
	return HorizontalOp;
	if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
	return Broadcast;
	if (SDValue BitOp = lowerBuildVectorToBitOp(BV, DAG))
	return BitOp;

	unsigned EVTBits = ExtVT.getSizeInBits();

	unsigned NumZero = 0;
	unsigned NumNonZero = 0;
	uint64_t NonZeros = 0;
	bool IsAllConstants = true;
	SmallSet<SDValue, 8> Values;
	for (unsigned i = 0; i < NumElems; ++i) {
	SDValue Elt = Op.getOperand(i);
	if (Elt.isUndef())
	continue;
	Values.insert(Elt);
	if (Elt.getOpcode() != ISD::Constant &&
	Elt.getOpcode() != ISD::ConstantFP)
	IsAllConstants = false;
	if (X86::isZeroNode(Elt))
	NumZero++;
	else {
	assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range.
	NonZeros \|= ((uint64_t)1 << i);
	NumNonZero++;
	}
	}

	// All undef vector. Return an UNDEF. All zero vectors were handled above.
	if (NumNonZero == 0)
	return DAG.getUNDEF(VT);

	// Special case for single non-zero, non-undef, element.
	if (NumNonZero == 1) {
	unsigned Idx = countTrailingZeros(NonZeros);
	SDValue Item = Op.getOperand(Idx);

	// If this is an insertion of an i64 value on x86-32, and if the top bits of
	// the value are obviously zero, truncate the value to i32 and do the
	// insertion that way. Only do this if the value is non-constant or if the
	// value is a constant being inserted into element 0. It is cheaper to do
	// a constant pool load than it is to do a movd + shuffle.
	if (ExtVT == MVT::i64 && !Subtarget.is64Bit() &&
	(!IsAllConstants \|\| Idx == 0)) {
	if (DAG.MaskedValueIsZero(Item, APInt::getHighBitsSet(64, 32))) {
	// Handle SSE only.
	assert(VT == MVT::v2i64 && "Expected an SSE value type!");
	MVT VecVT = MVT::v4i32;

	// Truncate the value (which may itself be a constant) to i32, and
	// convert it to a vector with movd (S2V+shuffle to zero extend).
	Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
	return DAG.getBitcast(VT, getShuffleVectorZeroOrUndef(
	Item, Idx * 2, true, Subtarget, DAG));
	}
	}

	// If we have a constant or non-constant insertion into the low element of
	// a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
	// the rest of the elements. This will be matched as movd/movq/movss/movsd
	// depending on what the source datatype is.
	if (Idx == 0) {
	if (NumZero == 0)
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);

	if (ExtVT == MVT::i32 \|\| ExtVT == MVT::f32 \|\| ExtVT == MVT::f64 \|\|
	(ExtVT == MVT::i64 && Subtarget.is64Bit())) {
	assert((VT.is128BitVector() \|\| VT.is256BitVector() \|\|
	VT.is512BitVector()) &&
	"Expected an SSE value type!");
	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
	// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
	return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
	}

	// We can't directly insert an i8 or i16 into a vector, so zero extend
	// it to i32 first.
	if (ExtVT == MVT::i16 \|\| ExtVT == MVT::i8) {
	Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
	if (VT.getSizeInBits() >= 256) {
	MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
	if (Subtarget.hasAVX()) {
	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item);
	Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
	} else {
	// Without AVX, we need to extend to a 128-bit vector and then
	// insert into the 256-bit vector.
	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
	SDValue ZeroVec = getZeroVector(ShufVT, Subtarget, DAG, dl);
	Item = insert128BitVector(ZeroVec, Item, 0, DAG, dl);
	}
	} else {
	assert(VT.is128BitVector() && "Expected an SSE value type!");
	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
	Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
	}
	return DAG.getBitcast(VT, Item);
	}
	}

	// Is it a vector logical left shift?
	if (NumElems == 2 && Idx == 1 &&
	X86::isZeroNode(Op.getOperand(0)) &&
	!X86::isZeroNode(Op.getOperand(1))) {
	unsigned NumBits = VT.getSizeInBits();
	return getVShift(true, VT,
	DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
	VT, Op.getOperand(1)),
	NumBits/2, DAG, *this, dl);
	}

	if (IsAllConstants) // Otherwise, it's better to do a constpool load.
	return SDValue();

	// Otherwise, if this is a vector with i32 or f32 elements, and the element
	// is a non-constant being inserted into an element other than the low one,
	// we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
	// movd/movss) to move this into the low element, then shuffle it into
	// place.
	if (EVTBits == 32) {
	Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
	return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
	}
	}

	// Splat is obviously ok. Let legalizer expand it to a shuffle.
	if (Values.size() == 1) {
	if (EVTBits == 32) {
	// Instead of a shuffle like this:
	// shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
	// Check if it's possible to issue this instead.
	// shuffle (vload ptr)), undef, <1, 1, 1, 1>
	unsigned Idx = countTrailingZeros(NonZeros);
	SDValue Item = Op.getOperand(Idx);
	if (Op.getNode()->isOnlyUserOf(Item.getNode()))
	return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
	}
	return SDValue();
	}

	// A vector full of immediates; various special cases are already
	// handled, so this is best done with a single constant-pool load.
	if (IsAllConstants)
	return SDValue();

	// See if we can use a vector load to get all of the elements.
	if (VT.is128BitVector() \|\| VT.is256BitVector() \|\| VT.is512BitVector()) {
	SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
	if (SDValue LD =
	EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false))
	return LD;
	}

	// For AVX-length vectors, build the individual 128-bit pieces and use
	// shuffles to put them in place.
	if (VT.is256BitVector() \|\| VT.is512BitVector()) {
	SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);

	EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);

	// Build both the lower and upper subvector.
	SDValue Lower =
	DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElems / 2));
	SDValue Upper = DAG.getBuildVector(
	HVT, dl, makeArrayRef(&Ops[NumElems / 2], NumElems / 2));

	// Recreate the wider vector with the lower and upper part.
	if (VT.is256BitVector())
	return concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
	return concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
	}

	// Let legalizer expand 2-wide build_vectors.
	if (EVTBits == 64) {
	if (NumNonZero == 1) {
	// One half is zero or undef.
	unsigned Idx = countTrailingZeros(NonZeros);
	SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
	Op.getOperand(Idx));
	return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
	}
	return SDValue();
	}

	// If element VT is < 32 bits, convert it to inserts into a zero vector.
	if (EVTBits == 8 && NumElems == 16)
	if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero,
	DAG, Subtarget))
	return V;

	if (EVTBits == 16 && NumElems == 8)
	if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero,
	DAG, Subtarget))
	return V;

	// If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
	if (EVTBits == 32 && NumElems == 4)
	if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget))
	return V;

	// If element VT is == 32 bits, turn it into a number of shuffles.
	if (NumElems == 4 && NumZero > 0) {
	SmallVector<SDValue, 8> Ops(NumElems);
	for (unsigned i = 0; i < 4; ++i) {
	bool isZero = !(NonZeros & (1ULL << i));
	if (isZero)
	Ops[i] = getZeroVector(VT, Subtarget, DAG, dl);
	else
	Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
	}

	for (unsigned i = 0; i < 2; ++i) {
	switch ((NonZeros & (0x3 << i2)) >> (i2)) {
	default: break;
	case 0:
	Ops[i] = Ops[i*2]; // Must be a zero vector.
	break;
	case 1:
	Ops[i] = getMOVL(DAG, dl, VT, Ops[i2+1], Ops[i2]);
	break;
	case 2:
	Ops[i] = getMOVL(DAG, dl, VT, Ops[i2], Ops[i2+1]);
	break;
	case 3:
	Ops[i] = getUnpackl(DAG, dl, VT, Ops[i2], Ops[i2+1]);
	break;
	}
	}

	bool Reverse1 = (NonZeros & 0x3) == 2;
	bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
	int MaskVec[] = {
	Reverse1 ? 1 : 0,
	Reverse1 ? 0 : 1,
	static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
	static_cast<int>(Reverse2 ? NumElems : NumElems+1)
	};
	return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec);
	}

	if (Values.size() > 1 && VT.is128BitVector()) {
	// Check for a build vector from mostly shuffle plus few inserting.
	if (SDValue Sh = buildFromShuffleMostly(Op, DAG))
	return Sh;

	// For SSE 4.1, use insertps to put the high elements into the low element.
	if (Subtarget.hasSSE41()) {
	SDValue Result;
	if (!Op.getOperand(0).isUndef())
	Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
	else
	Result = DAG.getUNDEF(VT);

	for (unsigned i = 1; i < NumElems; ++i) {
	if (Op.getOperand(i).isUndef()) continue;
	Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
	Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
	}
	return Result;
	}

	// Otherwise, expand into a number of unpckl*, start by extending each of
	// our (non-undef) elements to the full vector width with the element in the
	// bottom slot of the vector (which generates no code for SSE).
	SmallVector<SDValue, 8> Ops(NumElems);
	for (unsigned i = 0; i < NumElems; ++i) {
	if (!Op.getOperand(i).isUndef())
	Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
	else
	Ops[i] = DAG.getUNDEF(VT);
	}

	// Next, we iteratively mix elements, e.g. for v4f32:
	// Step 1: unpcklps 0, 1 ==> X: <?, ?, 1, 0>
	// : unpcklps 2, 3 ==> Y: <?, ?, 3, 2>
	// Step 2: unpcklpd X, Y ==> <3, 2, 1, 0>
	for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) {
	// Generate scaled UNPCKL shuffle mask.
	SmallVector<int, 16> Mask;
	for(unsigned i = 0; i != Scale; ++i)
	Mask.push_back(i);
	for (unsigned i = 0; i != Scale; ++i)
	Mask.push_back(NumElems+i);
	Mask.append(NumElems - Mask.size(), SM_SentinelUndef);

	for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i)
	Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2i], Ops[(2i)+1], Mask);
	}
	return Ops[0];
	}
	return SDValue();
	}

	// 256-bit AVX can use the vinsertf128 instruction
	// to create 256-bit vectors from two other 128-bit ones.
	static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
	SDLoc dl(Op);
	MVT ResVT = Op.getSimpleValueType();

	assert((ResVT.is256BitVector() \|\|
	ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");

	SDValue V1 = Op.getOperand(0);
	SDValue V2 = Op.getOperand(1);
	unsigned NumElems = ResVT.getVectorNumElements();
	if (ResVT.is256BitVector())
	return concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);

	if (Op.getNumOperands() == 4) {
	MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
	ResVT.getVectorNumElements()/2);
	SDValue V3 = Op.getOperand(2);
	SDValue V4 = Op.getOperand(3);
	return concat256BitVectors(
	concat128BitVectors(V1, V2, HalfVT, NumElems / 2, DAG, dl),
	concat128BitVectors(V3, V4, HalfVT, NumElems / 2, DAG, dl), ResVT,
	NumElems, DAG, dl);
	}
	return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
	}

	// Return true if all the operands of the given CONCAT_VECTORS node are zeros
	// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0)
	static bool isExpandWithZeros(const SDValue &Op) {
	assert(Op.getOpcode() == ISD::CONCAT_VECTORS &&
	"Expand with zeros only possible in CONCAT_VECTORS nodes!");

	for (unsigned i = 1; i < Op.getNumOperands(); i++)
	if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode()))
	return false;

	return true;
	}

	// Returns true if the given node is a type promotion (by concatenating i1
	// zeros) of the result of a node that already zeros all upper bits of
	// k-register.
	static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) {
	unsigned Opc = Op.getOpcode();

	assert(Opc == ISD::CONCAT_VECTORS &&
	Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
	"Unexpected node to check for type promotion!");

	// As long as we are concatenating zeros to the upper part of a previous node
	// result, climb up the tree until a node with different opcode is
	// encountered
	while (Opc == ISD::INSERT_SUBVECTOR \|\| Opc == ISD::CONCAT_VECTORS) {
	if (Opc == ISD::INSERT_SUBVECTOR) {
	if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) &&
	Op.getConstantOperandVal(2) == 0)
	Op = Op.getOperand(1);
	else
	return SDValue();
	} else { // Opc == ISD::CONCAT_VECTORS
	if (isExpandWithZeros(Op))
	Op = Op.getOperand(0);
	else
	return SDValue();
	}
	Opc = Op.getOpcode();
	}

	// Check if the first inserted node zeroes the upper bits, or an 'and' result
	// of a node that zeros the upper bits (its masked version).
	if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) \|\|
	(Op.getOpcode() == ISD::AND &&
	(isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) \|\|
	isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) {
	return Op;
	}

	return SDValue();
	}

	static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
	const X86Subtarget &Subtarget,
	SelectionDAG & DAG) {
	SDLoc dl(Op);
	MVT ResVT = Op.getSimpleValueType();
	unsigned NumOfOperands = Op.getNumOperands();

	assert(isPowerOf2_32(NumOfOperands) &&
	"Unexpected number of operands in CONCAT_VECTORS");

	// If this node promotes - by concatenating zeroes - the type of the result
	// of a node with instruction that zeroes all upper (irrelevant) bits of the
	// output register, mark it as legal and catch the pattern in instruction
	// selection to avoid emitting extra insturctions (for zeroing upper bits).
	if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op)) {
	SDValue ZeroC = DAG.getConstant(0, dl, MVT::i64);
	SDValue AllZeros = DAG.getSplatBuildVector(ResVT, dl, ZeroC);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, AllZeros, Promoted,
	ZeroC);
	}

	SDValue Undef = DAG.getUNDEF(ResVT);
	if (NumOfOperands > 2) {
	// Specialize the cases when all, or all but one, of the operands are undef.
	unsigned NumOfDefinedOps = 0;
	unsigned OpIdx = 0;
	for (unsigned i = 0; i < NumOfOperands; i++)
	if (!Op.getOperand(i).isUndef()) {
	NumOfDefinedOps++;
	OpIdx = i;
	}
	if (NumOfDefinedOps == 0)
	return Undef;
	if (NumOfDefinedOps == 1) {
	unsigned SubVecNumElts =
	Op.getOperand(OpIdx).getValueType().getVectorNumElements();
	SDValue IdxVal = DAG.getIntPtrConstant(SubVecNumElts * OpIdx, dl);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef,
	Op.getOperand(OpIdx), IdxVal);
	}

	MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
	ResVT.getVectorNumElements()/2);
	SmallVector<SDValue, 2> Ops;
	for (unsigned i = 0; i < NumOfOperands/2; i++)
	Ops.push_back(Op.getOperand(i));
	SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops);
	Ops.clear();
	for (unsigned i = NumOfOperands/2; i < NumOfOperands; i++)
	Ops.push_back(Op.getOperand(i));
	SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops);
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
	}

	// 2 operands
	SDValue V1 = Op.getOperand(0);
	SDValue V2 = Op.getOperand(1);
	unsigned NumElems = ResVT.getVectorNumElements();
	assert(V1.getValueType() == V2.getValueType() &&
	V1.getValueType().getVectorNumElements() == NumElems/2 &&
	"Unexpected operands in CONCAT_VECTORS");

	if (ResVT.getSizeInBits() >= 16)
	return Op; // The operation is legal with KUNPCK

	bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode());
	bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode());
	SDValue ZeroVec = getZeroVector(ResVT, Subtarget, DAG, dl);
	if (IsZeroV1 && IsZeroV2)
	return ZeroVec;

	SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
	if (V2.isUndef())
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
	if (IsZeroV2)
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V1, ZeroIdx);

	SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl);
	if (V1.isUndef())
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal);

	if (IsZeroV1)
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal);

	V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, V1, V2, IdxVal);
	}

	static SDValue LowerCONCAT_VECTORS(SDValue Op,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	if (VT.getVectorElementType() == MVT::i1)
	return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG);

	assert((VT.is256BitVector() && Op.getNumOperands() == 2) \|\|
	(VT.is512BitVector() && (Op.getNumOperands() == 2 \|\|
	Op.getNumOperands() == 4)));

	// AVX can use the vinsertf128 instruction to create 256-bit vectors
	// from two other 128-bit ones.

	// 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors
	return LowerAVXCONCAT_VECTORS(Op, DAG);
	}

	//===----------------------------------------------------------------------===//
	// Vector shuffle lowering
	//
	// This is an experimental code path for lowering vector shuffles on x86. It is
	// designed to handle arbitrary vector shuffles and blends, gracefully
	// degrading performance as necessary. It works hard to recognize idiomatic
	// shuffles and lower them to optimal instruction patterns without leaving
	// a framework that allows reasonably efficient handling of all vector shuffle
	// patterns.
	//===----------------------------------------------------------------------===//

	/// \brief Tiny helper function to identify a no-op mask.
	///
	/// This is a somewhat boring predicate function. It checks whether the mask
	/// array input, which is assumed to be a single-input shuffle mask of the kind
	/// used by the X86 shuffle instructions (not a fully general
	/// ShuffleVectorSDNode mask) requires any shuffles to occur. Both undef and an
	/// in-place shuffle are 'no-op's.
	static bool isNoopShuffleMask(ArrayRef<int> Mask) {
	for (int i = 0, Size = Mask.size(); i < Size; ++i) {
	assert(Mask[i] >= -1 && "Out of bound mask element!");
	if (Mask[i] >= 0 && Mask[i] != i)
	return false;
	}
	return true;
	}

	/// \brief Test whether there are elements crossing 128-bit lanes in this
	/// shuffle mask.
	///
	/// X86 divides up its shuffles into in-lane and cross-lane shuffle operations
	/// and we routinely test for these.
	static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
	int LaneSize = 128 / VT.getScalarSizeInBits();
	int Size = Mask.size();
	for (int i = 0; i < Size; ++i)
	if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
	return true;
	return false;
	}

	/// \brief Test whether a shuffle mask is equivalent within each sub-lane.
	///
	/// This checks a shuffle mask to see if it is performing the same
	/// lane-relative shuffle in each sub-lane. This trivially implies
	/// that it is also not lane-crossing. It may however involve a blend from the
	/// same lane of a second vector.
	///
	/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
	/// non-trivial to compute in the face of undef lanes. The representation is
	/// suitable for use with existing 128-bit shuffles as entries from the second
	/// vector have been remapped to [LaneSize, 2*LaneSize).
	static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
	ArrayRef<int> Mask,
	SmallVectorImpl<int> &RepeatedMask) {
	auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
	RepeatedMask.assign(LaneSize, -1);
	int Size = Mask.size();
	for (int i = 0; i < Size; ++i) {
	assert(Mask[i] == SM_SentinelUndef \|\| Mask[i] >= 0);
	if (Mask[i] < 0)
	continue;
	if ((Mask[i] % Size) / LaneSize != i / LaneSize)
	// This entry crosses lanes, so there is no way to model this shuffle.
	return false;

	// Ok, handle the in-lane shuffles by detecting if and when they repeat.
	// Adjust second vector indices to start at LaneSize instead of Size.
	int LocalM = Mask[i] < Size ? Mask[i] % LaneSize
	: Mask[i] % LaneSize + LaneSize;
	if (RepeatedMask[i % LaneSize] < 0)
	// This is the first non-undef entry in this slot of a 128-bit lane.
	RepeatedMask[i % LaneSize] = LocalM;
	else if (RepeatedMask[i % LaneSize] != LocalM)
	// Found a mismatch with the repeated mask.
	return false;
	}
	return true;
	}

	/// Test whether a shuffle mask is equivalent within each 128-bit lane.
	static bool
	is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
	SmallVectorImpl<int> &RepeatedMask) {
	return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
	}

	/// Test whether a shuffle mask is equivalent within each 256-bit lane.
	static bool
	is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
	SmallVectorImpl<int> &RepeatedMask) {
	return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask);
	}

	/// Test whether a target shuffle mask is equivalent within each sub-lane.
	/// Unlike isRepeatedShuffleMask we must respect SM_SentinelZero.
	static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT,
	ArrayRef<int> Mask,
	SmallVectorImpl<int> &RepeatedMask) {
	int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
	RepeatedMask.assign(LaneSize, SM_SentinelUndef);
	int Size = Mask.size();
	for (int i = 0; i < Size; ++i) {
	assert(isUndefOrZero(Mask[i]) \|\| (Mask[i] >= 0));
	if (Mask[i] == SM_SentinelUndef)
	continue;
	if (Mask[i] == SM_SentinelZero) {
	if (!isUndefOrZero(RepeatedMask[i % LaneSize]))
	return false;
	RepeatedMask[i % LaneSize] = SM_SentinelZero;
	continue;
	}
	if ((Mask[i] % Size) / LaneSize != i / LaneSize)
	// This entry crosses lanes, so there is no way to model this shuffle.
	return false;

	// Ok, handle the in-lane shuffles by detecting if and when they repeat.
	// Adjust second vector indices to start at LaneSize instead of Size.
	int LocalM =
	Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
	if (RepeatedMask[i % LaneSize] == SM_SentinelUndef)
	// This is the first non-undef entry in this slot of a 128-bit lane.
	RepeatedMask[i % LaneSize] = LocalM;
	else if (RepeatedMask[i % LaneSize] != LocalM)
	// Found a mismatch with the repeated mask.
	return false;
	}
	return true;
	}

	/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
	/// arguments.
	///
	/// This is a fast way to test a shuffle mask against a fixed pattern:
	///
	/// if (isShuffleEquivalent(Mask, 3, 2, {1, 0})) { ... }
	///
	/// It returns true if the mask is exactly as wide as the argument list, and
	/// each element of the mask is either -1 (signifying undef) or the value given
	/// in the argument.
	static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef<int> Mask,
	ArrayRef<int> ExpectedMask) {
	if (Mask.size() != ExpectedMask.size())
	return false;

	int Size = Mask.size();

	// If the values are build vectors, we can look through them to find
	// equivalent inputs that make the shuffles equivalent.
	auto *BV1 = dyn_cast<BuildVectorSDNode>(V1);
	auto *BV2 = dyn_cast<BuildVectorSDNode>(V2);

	for (int i = 0; i < Size; ++i) {
	assert(Mask[i] >= -1 && "Out of bound mask element!");
	if (Mask[i] >= 0 && Mask[i] != ExpectedMask[i]) {
	auto *MaskBV = Mask[i] < Size ? BV1 : BV2;
	auto *ExpectedBV = ExpectedMask[i] < Size ? BV1 : BV2;
	if (!MaskBV \|\| !ExpectedBV \|\|
	MaskBV->getOperand(Mask[i] % Size) !=
	ExpectedBV->getOperand(ExpectedMask[i] % Size))
	return false;
	}
	}

	return true;
	}

	/// Checks whether a target shuffle mask is equivalent to an explicit pattern.
	///
	/// The masks must be exactly the same width.
	///
	/// If an element in Mask matches SM_SentinelUndef (-1) then the corresponding
	/// value in ExpectedMask is always accepted. Otherwise the indices must match.
	///
	/// SM_SentinelZero is accepted as a valid negative index but must match in both.
	static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
	ArrayRef<int> ExpectedMask) {
	int Size = Mask.size();
	if (Size != (int)ExpectedMask.size())
	return false;

	for (int i = 0; i < Size; ++i)
	if (Mask[i] == SM_SentinelUndef)
	continue;
	else if (Mask[i] < 0 && Mask[i] != SM_SentinelZero)
	return false;
	else if (Mask[i] != ExpectedMask[i])
	return false;

	return true;
	}

	// Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle
	// mask.
	static SmallVector<int, 64> createTargetShuffleMask(ArrayRef<int> Mask,
	const APInt &Zeroable) {
	int NumElts = Mask.size();
	assert(NumElts == (int)Zeroable.getBitWidth() && "Mismatch mask sizes");

	SmallVector<int, 64> TargetMask(NumElts, SM_SentinelUndef);
	for (int i = 0; i != NumElts; ++i) {
	int M = Mask[i];
	if (M == SM_SentinelUndef)
	continue;
	assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index");
	TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M);
	}
	return TargetMask;
	}

	// Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
	// instructions.
	static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
	if (VT != MVT::v8i32 && VT != MVT::v8f32)
	return false;

	SmallVector<int, 8> Unpcklwd;
	createUnpackShuffleMask(MVT::v8i16, Unpcklwd, /* Lo = */ true,
	/* Unary = */ false);
	SmallVector<int, 8> Unpckhwd;
	createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false,
	/* Unary = */ false);
	bool IsUnpackwdMask = (isTargetShuffleEquivalent(Mask, Unpcklwd) \|\|
	isTargetShuffleEquivalent(Mask, Unpckhwd));
	return IsUnpackwdMask;
	}

	/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
	///
	/// This helper function produces an 8-bit shuffle immediate corresponding to
	/// the ubiquitous shuffle encoding scheme used in x86 instructions for
	/// shuffling 4 lanes. It can be used with most of the PSHUF instructions for
	/// example.
	///
	/// NB: We rely heavily on "undef" masks preserving the input lane.
	static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) {
	assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
	assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");
	assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");
	assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");
	assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");

	unsigned Imm = 0;
	Imm \|= (Mask[0] < 0 ? 0 : Mask[0]) << 0;
	Imm \|= (Mask[1] < 0 ? 1 : Mask[1]) << 2;
	Imm \|= (Mask[2] < 0 ? 2 : Mask[2]) << 4;
	Imm \|= (Mask[3] < 0 ? 3 : Mask[3]) << 6;
	return Imm;
	}

	static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
	SelectionDAG &DAG) {
	return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
	}

	/// \brief Compute whether each element of a shuffle is zeroable.
	///
	/// A "zeroable" vector shuffle element is one which can be lowered to zero.
	/// Either it is an undef element in the shuffle mask, the element of the input
	/// referenced is undef, or the element of the input referenced is known to be
	/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
	/// as many lanes with this technique as possible to simplify the remaining
	/// shuffle.
	static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
	SDValue V1, SDValue V2) {
	APInt Zeroable(Mask.size(), 0);
	V1 = peekThroughBitcasts(V1);
	V2 = peekThroughBitcasts(V2);

	bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
	bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());

	int VectorSizeInBits = V1.getValueSizeInBits();
	int ScalarSizeInBits = VectorSizeInBits / Mask.size();
	assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");

	for (int i = 0, Size = Mask.size(); i < Size; ++i) {
	int M = Mask[i];
	// Handle the easy cases.
	if (M < 0 \|\| (M >= 0 && M < Size && V1IsZero) \|\| (M >= Size && V2IsZero)) {
	Zeroable.setBit(i);
	continue;
	}

	// Determine shuffle input and normalize the mask.
	SDValue V = M < Size ? V1 : V2;
	M %= Size;

	// Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements.
	if (V.getOpcode() != ISD::BUILD_VECTOR)
	continue;

	// If the BUILD_VECTOR has fewer elements then the bitcasted portion of
	// the (larger) source element must be UNDEF/ZERO.
	if ((Size % V.getNumOperands()) == 0) {
	int Scale = Size / V->getNumOperands();
	SDValue Op = V.getOperand(M / Scale);
	if (Op.isUndef() \|\| X86::isZeroNode(Op))
	Zeroable.setBit(i);
	else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
	APInt Val = Cst->getAPIntValue();
	Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
	Val = Val.getLoBits(ScalarSizeInBits);
	if (Val == 0)
	Zeroable.setBit(i);
	} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
	APInt Val = Cst->getValueAPF().bitcastToAPInt();
	Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
	Val = Val.getLoBits(ScalarSizeInBits);
	if (Val == 0)
	Zeroable.setBit(i);
	}
	continue;
	}

	// If the BUILD_VECTOR has more elements then all the (smaller) source
	// elements must be UNDEF or ZERO.
	if ((V.getNumOperands() % Size) == 0) {
	int Scale = V->getNumOperands() / Size;
	bool AllZeroable = true;
	for (int j = 0; j < Scale; ++j) {
	SDValue Op = V.getOperand((M * Scale) + j);
	AllZeroable &= (Op.isUndef() \|\| X86::isZeroNode(Op));
	}
	if (AllZeroable)
	Zeroable.setBit(i);
	continue;
	}
	}

	return Zeroable;
	}

	// The Shuffle result is as follow:
	// 0a[0]0a[1]...0*a[n] , n >=0 where a[] elements in a ascending order.
	// Each Zeroable's element correspond to a particular Mask's element.
	// As described in computeZeroableShuffleElements function.
	//
	// The function looks for a sub-mask that the nonzero elements are in
	// increasing order. If such sub-mask exist. The function returns true.
	static bool isNonZeroElementsInOrder(const APInt &Zeroable,
	ArrayRef<int> Mask, const EVT &VectorType,
	bool &IsZeroSideLeft) {
	int NextElement = -1;
	// Check if the Mask's nonzero elements are in increasing order.
	for (int i = 0, e = Mask.size(); i < e; i++) {
	// Checks if the mask's zeros elements are built from only zeros.
	assert(Mask[i] >= -1 && "Out of bound mask element!");
	if (Mask[i] < 0)
	return false;
	if (Zeroable[i])
	continue;
	// Find the lowest non zero element
	if (NextElement < 0) {
	NextElement = Mask[i] != 0 ? VectorType.getVectorNumElements() : 0;
	IsZeroSideLeft = NextElement != 0;
	}
	// Exit if the mask's non zero elements are not in increasing order.
	if (NextElement != Mask[i])
	return false;
	NextElement++;
	}
	return true;
	}

	/// Try to lower a shuffle with a single PSHUFB of V1 or V2.
	static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
	ArrayRef<int> Mask, SDValue V1,
	SDValue V2,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	int Size = Mask.size();
	int LaneSize = 128 / VT.getScalarSizeInBits();
	const int NumBytes = VT.getSizeInBits() / 8;
	const int NumEltBytes = VT.getScalarSizeInBits() / 8;

	assert((Subtarget.hasSSSE3() && VT.is128BitVector()) \|\|
	(Subtarget.hasAVX2() && VT.is256BitVector()) \|\|
	(Subtarget.hasBWI() && VT.is512BitVector()));

	SmallVector<SDValue, 64> PSHUFBMask(NumBytes);
	// Sign bit set in i8 mask means zero element.
	SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8);

	SDValue V;
	for (int i = 0; i < NumBytes; ++i) {
	int M = Mask[i / NumEltBytes];
	if (M < 0) {
	PSHUFBMask[i] = DAG.getUNDEF(MVT::i8);
	continue;
	}
	if (Zeroable[i / NumEltBytes]) {
	PSHUFBMask[i] = ZeroMask;
	continue;
	}

	// We can only use a single input of V1 or V2.
	SDValue SrcV = (M >= Size ? V2 : V1);
	if (V && V != SrcV)
	return SDValue();
	V = SrcV;
	M %= Size;

	// PSHUFB can't cross lanes, ensure this doesn't happen.
	if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize))
	return SDValue();

	M = M % LaneSize;
	M = M * NumEltBytes + (i % NumEltBytes);
	PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8);
	}
	assert(V && "Failed to find a source input");

	MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes);
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V),
	DAG.getBuildVector(I8VT, DL, PSHUFBMask)));
	}

	static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
	const X86Subtarget &Subtarget, SelectionDAG &DAG,
	const SDLoc &dl);

	// X86 has dedicated shuffle that can be lowered to VEXPAND
	static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
	const APInt &Zeroable,
	ArrayRef<int> Mask, SDValue &V1,
	SDValue &V2, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	bool IsLeftZeroSide = true;
	if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(),
	IsLeftZeroSide))
	return SDValue();
	unsigned VEXPANDMask = (~Zeroable).getZExtValue();
	MVT IntegerType =
	MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
	SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType);
	unsigned NumElts = VT.getVectorNumElements();
	assert((NumElts == 4 \|\| NumElts == 8 \|\| NumElts == 16) &&
	"Unexpected number of vector elements");
	SDValue VMask = getMaskNode(MaskNode, MVT::getVectorVT(MVT::i1, NumElts),
	Subtarget, DAG, DL);
	SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL);
	SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1;
	return DAG.getSelect(DL, VT, VMask,
	DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector),
	ZeroVector);
	}

	static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
	unsigned &UnpackOpcode, bool IsUnary,
	ArrayRef<int> TargetMask, SDLoc &DL,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	int NumElts = VT.getVectorNumElements();

	bool Undef1 = true, Undef2 = true, Zero1 = true, Zero2 = true;
	for (int i = 0; i != NumElts; i += 2) {
	int M1 = TargetMask[i + 0];
	int M2 = TargetMask[i + 1];
	Undef1 &= (SM_SentinelUndef == M1);
	Undef2 &= (SM_SentinelUndef == M2);
	Zero1 &= isUndefOrZero(M1);
	Zero2 &= isUndefOrZero(M2);
	}
	assert(!((Undef1 \|\| Zero1) && (Undef2 \|\| Zero2)) &&
	"Zeroable shuffle detected");

	// Attempt to match the target mask against the unpack lo/hi mask patterns.
	SmallVector<int, 64> Unpckl, Unpckh;
	createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary);
	if (isTargetShuffleEquivalent(TargetMask, Unpckl)) {
	UnpackOpcode = X86ISD::UNPCKL;
	V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
	V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
	return true;
	}

	createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary);
	if (isTargetShuffleEquivalent(TargetMask, Unpckh)) {
	UnpackOpcode = X86ISD::UNPCKH;
	V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
	V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
	return true;
	}

	// If an unary shuffle, attempt to match as an unpack lo/hi with zero.
	if (IsUnary && (Zero1 \|\| Zero2)) {
	// Don't bother if we can blend instead.
	if ((Subtarget.hasSSE41() \|\| VT == MVT::v2i64 \|\| VT == MVT::v2f64) &&
	isSequentialOrUndefOrZeroInRange(TargetMask, 0, NumElts, 0))
	return false;

	bool MatchLo = true, MatchHi = true;
	for (int i = 0; (i != NumElts) && (MatchLo \|\| MatchHi); ++i) {
	int M = TargetMask[i];

	// Ignore if the input is known to be zero or the index is undef.
	if ((((i & 1) == 0) && Zero1) \|\| (((i & 1) == 1) && Zero2) \|\|
	(M == SM_SentinelUndef))
	continue;

	MatchLo &= (M == Unpckl[i]);
	MatchHi &= (M == Unpckh[i]);
	}

	if (MatchLo \|\| MatchHi) {
	UnpackOpcode = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
	V2 = Zero2 ? getZeroVector(VT, Subtarget, DAG, DL) : V1;
	V1 = Zero1 ? getZeroVector(VT, Subtarget, DAG, DL) : V1;
	return true;
	}
	}

	// If a binary shuffle, commute and try again.
	if (!IsUnary) {
	ShuffleVectorSDNode::commuteMask(Unpckl);
	if (isTargetShuffleEquivalent(TargetMask, Unpckl)) {
	UnpackOpcode = X86ISD::UNPCKL;
	std::swap(V1, V2);
	return true;
	}

	ShuffleVectorSDNode::commuteMask(Unpckh);
	if (isTargetShuffleEquivalent(TargetMask, Unpckh)) {
	UnpackOpcode = X86ISD::UNPCKH;
	std::swap(V1, V2);
	return true;
	}
	}

	return false;
	}

	// X86 has dedicated unpack instructions that can handle specific blend
	// operations: UNPCKH and UNPCKL.
	static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
	ArrayRef<int> Mask, SDValue V1,
	SDValue V2, SelectionDAG &DAG) {
	SmallVector<int, 8> Unpckl;
	createUnpackShuffleMask(VT, Unpckl, /* Lo = / true, / Unary = */ false);
	if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
	return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);

	SmallVector<int, 8> Unpckh;
	createUnpackShuffleMask(VT, Unpckh, /* Lo = / false, / Unary = */ false);
	if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
	return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);

	// Commute and try again.
	ShuffleVectorSDNode::commuteMask(Unpckl);
	if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
	return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1);

	ShuffleVectorSDNode::commuteMask(Unpckh);
	if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
	return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1);

	return SDValue();
	}

	/// \brief Try to emit a bitmask instruction for a shuffle.
	///
	/// This handles cases where we can model a blend exactly as a bitmask due to
	/// one of the inputs being zeroable.
	static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SelectionDAG &DAG) {
	assert(!VT.isFloatingPoint() && "Floating point types are not supported");
	MVT EltVT = VT.getVectorElementType();
	SDValue Zero = DAG.getConstant(0, DL, EltVT);
	SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
	SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
	SDValue V;
	for (int i = 0, Size = Mask.size(); i < Size; ++i) {
	if (Zeroable[i])
	continue;
	if (Mask[i] % Size != i)
	return SDValue(); // Not a blend.
	if (!V)
	V = Mask[i] < Size ? V1 : V2;
	else if (V != (Mask[i] < Size ? V1 : V2))
	return SDValue(); // Can only let one input through the mask.

	VMaskOps[i] = AllOnes;
	}
	if (!V)
	return SDValue(); // No non-zeroable elements!

	SDValue VMask = DAG.getBuildVector(VT, DL, VMaskOps);
	return DAG.getNode(ISD::AND, DL, VT, V, VMask);
	}

	/// \brief Try to emit a blend instruction for a shuffle using bit math.
	///
	/// This is used as a fallback approach when first class blend instructions are
	/// unavailable. Currently it is only suitable for integer vectors, but could
	/// be generalized for floating point vectors if desirable.
	static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	assert(VT.isInteger() && "Only supports integer vector types!");
	MVT EltVT = VT.getVectorElementType();
	SDValue Zero = DAG.getConstant(0, DL, EltVT);
	SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
	SmallVector<SDValue, 16> MaskOps;
	for (int i = 0, Size = Mask.size(); i < Size; ++i) {
	if (Mask[i] >= 0 && Mask[i] != i && Mask[i] != i + Size)
	return SDValue(); // Shuffled input!
	MaskOps.push_back(Mask[i] < Size ? AllOnes : Zero);
	}

	SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps);
	V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask);
	// We have to cast V2 around.
	MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
	V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::ANDNP, DL, MaskVT,
	DAG.getBitcast(MaskVT, V1Mask),
	DAG.getBitcast(MaskVT, V2)));
	return DAG.getNode(ISD::OR, DL, VT, V1, V2);
	}

	static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
	SDValue PreservedSrc,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG);

	static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2,
	MutableArrayRef<int> TargetMask,
	bool &ForceV1Zero, bool &ForceV2Zero,
	uint64_t &BlendMask) {
	bool V1IsZeroOrUndef =
	V1.isUndef() \|\| ISD::isBuildVectorAllZeros(V1.getNode());
	bool V2IsZeroOrUndef =
	V2.isUndef() \|\| ISD::isBuildVectorAllZeros(V2.getNode());

	BlendMask = 0;
	ForceV1Zero = false, ForceV2Zero = false;
	assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask");

	// Attempt to generate the binary blend mask. If an input is zero then
	// we can use any lane.
	// TODO: generalize the zero matching to any scalar like isShuffleEquivalent.
	for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
	int M = TargetMask[i];
	if (M == SM_SentinelUndef)
	continue;
	if (M == i)
	continue;
	if (M == i + Size) {
	BlendMask \|= 1ull << i;
	continue;
	}
	if (M == SM_SentinelZero) {
	if (V1IsZeroOrUndef) {
	ForceV1Zero = true;
	TargetMask[i] = i;
	continue;
	}
	if (V2IsZeroOrUndef) {
	ForceV2Zero = true;
	BlendMask \|= 1ull << i;
	TargetMask[i] = i + Size;
	continue;
	}
	}
	return false;
	}
	return true;
	}

	uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size, int Scale) {
	uint64_t ScaledMask = 0;
	for (int i = 0; i != Size; ++i)
	if (BlendMask & (1ull << i))
	ScaledMask \|= ((1ull << Scale) - 1) << (i * Scale);
	return ScaledMask;
	}

	/// \brief Try to emit a blend instruction for a shuffle.
	///
	/// This doesn't do any checks for the availability of instructions for blending
	/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
	/// be matched in the backend with the type given. What it does check for is
	/// that the shuffle mask is a blend, or convertible into a blend with zero.
	static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Original,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SmallVector<int, 64> Mask = createTargetShuffleMask(Original, Zeroable);

	uint64_t BlendMask = 0;
	bool ForceV1Zero = false, ForceV2Zero = false;
	if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero,
	BlendMask))
	return SDValue();

	// Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
	if (ForceV1Zero)
	V1 = getZeroVector(VT, Subtarget, DAG, DL);
	if (ForceV2Zero)
	V2 = getZeroVector(VT, Subtarget, DAG, DL);

	switch (VT.SimpleTy) {
	case MVT::v2f64:
	case MVT::v4f32:
	case MVT::v4f64:
	case MVT::v8f32:
	return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
	DAG.getConstant(BlendMask, DL, MVT::i8));

	case MVT::v4i64:
	case MVT::v8i32:
	assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
	LLVM_FALLTHROUGH;
	case MVT::v2i64:
	case MVT::v4i32:
	// If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
	// that instruction.
	if (Subtarget.hasAVX2()) {
	// Scale the blend by the number of 32-bit dwords per element.
	int Scale = VT.getScalarSizeInBits() / 32;
	BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
	MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
	V1 = DAG.getBitcast(BlendVT, V1);
	V2 = DAG.getBitcast(BlendVT, V2);
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
	DAG.getConstant(BlendMask, DL, MVT::i8)));
	}
	LLVM_FALLTHROUGH;
	case MVT::v8i16: {
	// For integer shuffles we need to expand the mask and cast the inputs to
	// v8i16s prior to blending.
	int Scale = 8 / VT.getVectorNumElements();
	BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
	V1 = DAG.getBitcast(MVT::v8i16, V1);
	V2 = DAG.getBitcast(MVT::v8i16, V2);
	return DAG.getBitcast(VT,
	DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
	DAG.getConstant(BlendMask, DL, MVT::i8)));
	}

	case MVT::v16i16: {
	assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
	SmallVector<int, 8> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
	// We can lower these with PBLENDW which is mirrored across 128-bit lanes.
	assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!");
	BlendMask = 0;
	for (int i = 0; i < 8; ++i)
	if (RepeatedMask[i] >= 8)
	BlendMask \|= 1ull << i;
	return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
	DAG.getConstant(BlendMask, DL, MVT::i8));
	}
	LLVM_FALLTHROUGH;
	}
	case MVT::v16i8:
	case MVT::v32i8: {
	assert((VT.is128BitVector() \|\| Subtarget.hasAVX2()) &&
	"256-bit byte-blends require AVX2 support!");

	if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
	MVT IntegerType =
	MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
	SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
	return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
	}

	// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
	if (SDValue Masked =
	lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
	return Masked;

	// Scale the blend by the number of bytes per element.
	int Scale = VT.getScalarSizeInBits() / 8;

	// This form of blend is always done on bytes. Compute the byte vector
	// type.
	MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);

	// Compute the VSELECT mask. Note that VSELECT is really confusing in the
	// mix of LLVM's code generator and the x86 backend. We tell the code
	// generator that boolean values in the elements of an x86 vector register
	// are -1 for true and 0 for false. We then use the LLVM semantics of 'true'
	// mapping a select to operand #1, and 'false' mapping to operand #2. The
	// reality in x86 is that vector masks (pre-AVX-512) use only the high bit
	// of the element (the remaining are ignored) and 0 in that high bit would
	// mean operand #1 while 1 in the high bit would mean operand #2. So while
	// the LLVM model for boolean values in vector elements gets the relevant
	// bit set, it is set backwards and over constrained relative to x86's
	// actual model.
	SmallVector<SDValue, 32> VSELECTMask;
	for (int i = 0, Size = Mask.size(); i < Size; ++i)
	for (int j = 0; j < Scale; ++j)
	VSELECTMask.push_back(
	Mask[i] < 0 ? DAG.getUNDEF(MVT::i8)
	: DAG.getConstant(Mask[i] < Size ? -1 : 0, DL,
	MVT::i8));

	V1 = DAG.getBitcast(BlendVT, V1);
	V2 = DAG.getBitcast(BlendVT, V2);
	return DAG.getBitcast(
	VT,
	DAG.getSelect(DL, BlendVT, DAG.getBuildVector(BlendVT, DL, VSELECTMask),
	V1, V2));
	}
	case MVT::v16f32:
	case MVT::v8f64:
	case MVT::v8i64:
	case MVT::v16i32:
	case MVT::v32i16:
	case MVT::v64i8: {
	MVT IntegerType =
	MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
	SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
	return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
	}
	default:
	llvm_unreachable("Not a supported integer vector type!");
	}
	}

	/// \brief Try to lower as a blend of elements from two inputs followed by
	/// a single-input permutation.
	///
	/// This matches the pattern where we can blend elements from two inputs and
	/// then reduce the shuffle to a single-input permutation.
	static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	// We build up the blend mask while checking whether a blend is a viable way
	// to reduce the shuffle.
	SmallVector<int, 32> BlendMask(Mask.size(), -1);
	SmallVector<int, 32> PermuteMask(Mask.size(), -1);

	for (int i = 0, Size = Mask.size(); i < Size; ++i) {
	if (Mask[i] < 0)
	continue;

	assert(Mask[i] < Size * 2 && "Shuffle input is out of bounds.");

	if (BlendMask[Mask[i] % Size] < 0)
	BlendMask[Mask[i] % Size] = Mask[i];
	else if (BlendMask[Mask[i] % Size] != Mask[i])
	return SDValue(); // Can't blend in the needed input!

	PermuteMask[i] = Mask[i] % Size;
	}

	SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
	return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask);
	}

	/// \brief Generic routine to decompose a shuffle and blend into independent
	/// blends and permutes.
	///
	/// This matches the extremely common pattern for handling combined
	/// shuffle+blend operations on newer X86 ISAs where we have very fast blend
	/// operations. It will try to pick the best arrangement of shuffles and
	/// blends.
	static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL,
	MVT VT, SDValue V1,
	SDValue V2,
	ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	// Shuffle the input elements into the desired positions in V1 and V2 and
	// blend them together.
	SmallVector<int, 32> V1Mask(Mask.size(), -1);
	SmallVector<int, 32> V2Mask(Mask.size(), -1);
	SmallVector<int, 32> BlendMask(Mask.size(), -1);
	for (int i = 0, Size = Mask.size(); i < Size; ++i)
	if (Mask[i] >= 0 && Mask[i] < Size) {
	V1Mask[i] = Mask[i];
	BlendMask[i] = i;
	} else if (Mask[i] >= Size) {
	V2Mask[i] = Mask[i] - Size;
	BlendMask[i] = i + Size;
	}

	// Try to lower with the simpler initial blend strategy unless one of the
	// input shuffles would be a no-op. We prefer to shuffle inputs as the
	// shuffle may be able to fold with a load or other benefit. However, when
	// we'll have to do 2x as many shuffles in order to achieve this, blending
	// first is a better strategy.
	if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask))
	if (SDValue BlendPerm =
	lowerVectorShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG))
	return BlendPerm;

	V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
	V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
	return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
	}

	/// \brief Try to lower a vector shuffle as a rotation.
	///
	/// This is used for support PALIGNR for SSSE3 or VALIGND/Q for AVX512.
	static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2,
	ArrayRef<int> Mask) {
	int NumElts = Mask.size();

	// We need to detect various ways of spelling a rotation:
	// [11, 12, 13, 14, 15, 0, 1, 2]
	// [-1, 12, 13, 14, -1, -1, 1, -1]
	// [-1, -1, -1, -1, -1, -1, 1, 2]
	// [ 3, 4, 5, 6, 7, 8, 9, 10]
	// [-1, 4, 5, 6, -1, -1, 9, -1]
	// [-1, 4, 5, 6, -1, -1, -1, -1]
	int Rotation = 0;
	SDValue Lo, Hi;
	for (int i = 0; i < NumElts; ++i) {
	int M = Mask[i];
	assert((M == SM_SentinelUndef \|\| (0 <= M && M < (2*NumElts))) &&
	"Unexpected mask index.");
	if (M < 0)
	continue;

	// Determine where a rotated vector would have started.
	int StartIdx = i - (M % NumElts);
	if (StartIdx == 0)
	// The identity rotation isn't interesting, stop.
	return -1;

	// If we found the tail of a vector the rotation must be the missing
	// front. If we found the head of a vector, it must be how much of the
	// head.
	int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;

	if (Rotation == 0)
	Rotation = CandidateRotation;
	else if (Rotation != CandidateRotation)
	// The rotations don't match, so we can't match this mask.
	return -1;

	// Compute which value this mask is pointing at.
	SDValue MaskV = M < NumElts ? V1 : V2;

	// Compute which of the two target values this index should be assigned
	// to. This reflects whether the high elements are remaining or the low
	// elements are remaining.
	SDValue &TargetV = StartIdx < 0 ? Hi : Lo;

	// Either set up this value if we've not encountered it before, or check
	// that it remains consistent.
	if (!TargetV)
	TargetV = MaskV;
	else if (TargetV != MaskV)
	// This may be a rotation, but it pulls from the inputs in some
	// unsupported interleaving.
	return -1;
	}

	// Check that we successfully analyzed the mask, and normalize the results.
	assert(Rotation != 0 && "Failed to locate a viable rotation!");
	assert((Lo \|\| Hi) && "Failed to find a rotated input vector!");
	if (!Lo)
	Lo = Hi;
	else if (!Hi)
	Hi = Lo;

	V1 = Lo;
	V2 = Hi;

	return Rotation;
	}

	/// \brief Try to lower a vector shuffle as a byte rotation.
	///
	/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary
	/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
	/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will
	/// try to generically lower a vector shuffle through such an pattern. It
	/// does not check for the profitability of lowering either as PALIGNR or
	/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
	/// This matches shuffle vectors that look like:
	///
	/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
	///
	/// Essentially it concatenates V1 and V2, shifts right by some number of
	/// elements, and takes the low elements as the result. Note that while this is
	/// specified as a right shift because x86 is little-endian, it is a *left
	/// rotate* of the vector lanes.
	static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
	ArrayRef<int> Mask) {
	// Don't accept any shuffles with zero elements.
	if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
	return -1;

	// PALIGNR works on 128-bit lanes.
	SmallVector<int, 16> RepeatedMask;
	if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
	return -1;

	int Rotation = matchVectorShuffleAsRotate(V1, V2, RepeatedMask);
	if (Rotation <= 0)
	return -1;

	// PALIGNR rotates bytes, so we need to scale the
	// rotation based on how many bytes are in the vector lane.
	int NumElts = RepeatedMask.size();
	int Scale = 16 / NumElts;
	return Rotation * Scale;
	}

	static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");

	SDValue Lo = V1, Hi = V2;
	int ByteRotation = matchVectorShuffleAsByteRotate(VT, Lo, Hi, Mask);
	if (ByteRotation <= 0)
	return SDValue();

	// Cast the inputs to i8 vector of correct length to match PALIGNR or
	// PSLLDQ/PSRLDQ.
	MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
	Lo = DAG.getBitcast(ByteVT, Lo);
	Hi = DAG.getBitcast(ByteVT, Hi);

	// SSSE3 targets can use the palignr instruction.
	if (Subtarget.hasSSSE3()) {
	assert((!VT.is512BitVector() \|\| Subtarget.hasBWI()) &&
	"512-bit PALIGNR requires BWI instructions");
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi,
	DAG.getConstant(ByteRotation, DL, MVT::i8)));
	}

	assert(VT.is128BitVector() &&
	"Rotate-based lowering only supports 128-bit lowering!");
	assert(Mask.size() <= 16 &&
	"Can shuffle at most 16 bytes in a 128-bit vector!");
	assert(ByteVT == MVT::v16i8 &&
	"SSE2 rotate lowering only needed for v16i8!");

	// Default SSE2 implementation
	int LoByteShift = 16 - ByteRotation;
	int HiByteShift = ByteRotation;

	SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo,
	DAG.getConstant(LoByteShift, DL, MVT::i8));
	SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi,
	DAG.getConstant(HiByteShift, DL, MVT::i8));
	return DAG.getBitcast(VT,
	DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift));
	}

	/// \brief Try to lower a vector shuffle as a dword/qword rotation.
	///
	/// AVX512 has a VALIGND/VALIGNQ instructions that will do an arbitrary
	/// rotation of the concatenation of two vectors; This routine will
	/// try to generically lower a vector shuffle through such an pattern.
	///
	/// Essentially it concatenates V1 and V2, shifts right by some number of
	/// elements, and takes the low elements as the result. Note that while this is
	/// specified as a right shift because x86 is little-endian, it is a *left
	/// rotate* of the vector lanes.
	static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert((VT.getScalarType() == MVT::i32 \|\| VT.getScalarType() == MVT::i64) &&
	"Only 32-bit and 64-bit elements are supported!");

	// 128/256-bit vectors are only supported with VLX.
	assert((Subtarget.hasVLX() \|\| (!VT.is128BitVector() && !VT.is256BitVector()))
	&& "VLX required for 128/256-bit vectors");

	SDValue Lo = V1, Hi = V2;
	int Rotation = matchVectorShuffleAsRotate(Lo, Hi, Mask);
	if (Rotation <= 0)
	return SDValue();

	return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,
	DAG.getConstant(Rotation, DL, MVT::i8));
	}

	/// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
	///
	/// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
	/// PSRL(W/D/Q/DQ) SSE2 and AVX2 logical bit-shift instructions. The function
	/// matches elements from one of the input vectors shuffled to the left or
	/// right with zeroable elements 'shifted in'. It handles both the strictly
	/// bit-wise element shifts and the byte shift across an entire 128-bit double
	/// quad word lane.
	///
	/// PSHL : (little-endian) left bit shift.
	/// [ zz, 0, zz, 2 ]
	/// [ -1, 4, zz, -1 ]
	/// PSRL : (little-endian) right bit shift.
	/// [ 1, zz, 3, zz]
	/// [ -1, -1, 7, zz]
	/// PSLLDQ : (little-endian) left byte shift
	/// [ zz, 0, 1, 2, 3, 4, 5, 6]
	/// [ zz, zz, -1, -1, 2, 3, 4, -1]
	/// [ zz, zz, zz, zz, zz, zz, -1, 1]
	/// PSRLDQ : (little-endian) right byte shift
	/// [ 5, 6, 7, zz, zz, zz, zz, zz]
	/// [ -1, 5, 6, 7, zz, zz, zz, zz]
	/// [ 1, 2, -1, -1, -1, -1, zz, zz]
	static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
	unsigned ScalarSizeInBits,
	ArrayRef<int> Mask, int MaskOffset,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget) {
	int Size = Mask.size();
	unsigned SizeInBits = Size * ScalarSizeInBits;

	auto CheckZeros = [&](int Shift, int Scale, bool Left) {
	for (int i = 0; i < Size; i += Scale)
	for (int j = 0; j < Shift; ++j)
	if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
	return false;

	return true;
	};

	auto MatchShift = [&](int Shift, int Scale, bool Left) {
	for (int i = 0; i != Size; i += Scale) {
	unsigned Pos = Left ? i + Shift : i;
	unsigned Low = Left ? i : i + Shift;
	unsigned Len = Scale - Shift;
	if (!isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset))
	return -1;
	}

	int ShiftEltBits = ScalarSizeInBits * Scale;
	bool ByteShift = ShiftEltBits > 64;
	Opcode = Left ? (ByteShift ? X86ISD::VSHLDQ : X86ISD::VSHLI)
	: (ByteShift ? X86ISD::VSRLDQ : X86ISD::VSRLI);
	int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);

	// Normalize the scale for byte shifts to still produce an i64 element
	// type.
	Scale = ByteShift ? Scale / 2 : Scale;

	// We need to round trip through the appropriate type for the shift.
	MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
	ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
	: MVT::getVectorVT(ShiftSVT, Size / Scale);
	return (int)ShiftAmt;
	};

	// SSE/AVX supports logical shifts up to 64-bit integers - so we can just
	// keep doubling the size of the integer elements up to that. We can
	// then shift the elements of the integer vector by whole multiples of
	// their width within the elements of the larger integer vector. Test each
	// multiple to see if we can find a match with the moved element indices
	// and that the shifted in elements are all zeroable.
	unsigned MaxWidth = ((SizeInBits == 512) && !Subtarget.hasBWI() ? 64 : 128);
	for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
	for (int Shift = 1; Shift != Scale; ++Shift)
	for (bool Left : {true, false})
	if (CheckZeros(Shift, Scale, Left)) {
	int ShiftAmt = MatchShift(Shift, Scale, Left);
	if (0 < ShiftAmt)
	return ShiftAmt;
	}

	// no match
	return -1;
	}

	static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	int Size = Mask.size();
	assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");

	MVT ShiftVT;
	SDValue V = V1;
	unsigned Opcode;

	// Try to match shuffle against V1 shift.
	int ShiftAmt = matchVectorShuffleAsShift(
	ShiftVT, Opcode, VT.getScalarSizeInBits(), Mask, 0, Zeroable, Subtarget);

	// If V1 failed, try to match shuffle against V2 shift.
	if (ShiftAmt < 0) {
	ShiftAmt =
	matchVectorShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
	Mask, Size, Zeroable, Subtarget);
	V = V2;
	}

	if (ShiftAmt < 0)
	return SDValue();

	assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
	"Illegal integer vector type");
	V = DAG.getBitcast(ShiftVT, V);
	V = DAG.getNode(Opcode, DL, ShiftVT, V,
	DAG.getConstant(ShiftAmt, DL, MVT::i8));
	return DAG.getBitcast(VT, V);
	}

	// EXTRQ: Extract Len elements from lower half of source, starting at Idx.
	// Remainder of lower half result is zero and upper half is all undef.
	static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
	ArrayRef<int> Mask, uint64_t &BitLen,
	uint64_t &BitIdx, const APInt &Zeroable) {
	int Size = Mask.size();
	int HalfSize = Size / 2;
	assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
	assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask");

	// Upper half must be undefined.
	if (!isUndefInRange(Mask, HalfSize, HalfSize))
	return false;

	// Determine the extraction length from the part of the
	// lower half that isn't zeroable.
	int Len = HalfSize;
	for (; Len > 0; --Len)
	if (!Zeroable[Len - 1])
	break;
	assert(Len > 0 && "Zeroable shuffle mask");

	// Attempt to match first Len sequential elements from the lower half.
	SDValue Src;
	int Idx = -1;
	for (int i = 0; i != Len; ++i) {
	int M = Mask[i];
	if (M == SM_SentinelUndef)
	continue;
	SDValue &V = (M < Size ? V1 : V2);
	M = M % Size;

	// The extracted elements must start at a valid index and all mask
	// elements must be in the lower half.
	if (i > M \|\| M >= HalfSize)
	return false;

	if (Idx < 0 \|\| (Src == V && Idx == (M - i))) {
	Src = V;
	Idx = M - i;
	continue;
	}
	return false;
	}

	if (!Src \|\| Idx < 0)
	return false;

	assert((Idx + Len) <= HalfSize && "Illegal extraction mask");
	BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
	BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
	V1 = Src;
	return true;
	}

	// INSERTQ: Extract lowest Len elements from lower half of second source and
	// insert over first source, starting at Idx.
	// { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
	static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2,
	ArrayRef<int> Mask, uint64_t &BitLen,
	uint64_t &BitIdx) {
	int Size = Mask.size();
	int HalfSize = Size / 2;
	assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");

	// Upper half must be undefined.
	if (!isUndefInRange(Mask, HalfSize, HalfSize))
	return false;

	for (int Idx = 0; Idx != HalfSize; ++Idx) {
	SDValue Base;

	// Attempt to match first source from mask before insertion point.
	if (isUndefInRange(Mask, 0, Idx)) {
	/* EMPTY */
	} else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) {
	Base = V1;
	} else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) {
	Base = V2;
	} else {
	continue;
	}

	// Extend the extraction length looking to match both the insertion of
	// the second source and the remaining elements of the first.
	for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) {
	SDValue Insert;
	int Len = Hi - Idx;

	// Match insertion.
	if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) {
	Insert = V1;
	} else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) {
	Insert = V2;
	} else {
	continue;
	}

	// Match the remaining elements of the lower half.
	if (isUndefInRange(Mask, Hi, HalfSize - Hi)) {
	/* EMPTY */
	} else if ((!Base \|\| (Base == V1)) &&
	isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) {
	Base = V1;
	} else if ((!Base \|\| (Base == V2)) &&
	isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi,
	Size + Hi)) {
	Base = V2;
	} else {
	continue;
	}

	BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
	BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
	V1 = Base;
	V2 = Insert;
	return true;
	}
	}

	return false;
	}

	/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
	static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SelectionDAG &DAG) {
	uint64_t BitLen, BitIdx;
	if (matchVectorShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))
	return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1,
	DAG.getConstant(BitLen, DL, MVT::i8),
	DAG.getConstant(BitIdx, DL, MVT::i8));

	if (matchVectorShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))
	return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT),
	V2 ? V2 : DAG.getUNDEF(VT),
	DAG.getConstant(BitLen, DL, MVT::i8),
	DAG.getConstant(BitIdx, DL, MVT::i8));

	return SDValue();
	}

	/// \brief Lower a vector shuffle as a zero or any extension.
	///
	/// Given a specific number of elements, element bit width, and extension
	/// stride, produce either a zero or any extension based on the available
	/// features of the subtarget. The extended elements are consecutive and
	/// begin and can start from an offsetted element index in the input; to
	/// avoid excess shuffling the offset must either being in the bottom lane
	/// or at the start of a higher lane. All extended elements must be from
	/// the same lane.
	static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
	const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
	ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
	assert(Scale > 1 && "Need a scale to extend.");
	int EltBits = VT.getScalarSizeInBits();
	int NumElements = VT.getVectorNumElements();
	int NumEltsPerLane = 128 / EltBits;
	int OffsetLane = Offset / NumEltsPerLane;
	assert((EltBits == 8 \|\| EltBits == 16 \|\| EltBits == 32) &&
	"Only 8, 16, and 32 bit elements can be extended.");
	assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits.");
	assert(0 <= Offset && "Extension offset must be positive.");
	assert((Offset < NumEltsPerLane \|\| Offset % NumEltsPerLane == 0) &&
	"Extension offset must be in the first lane or start an upper lane.");

	// Check that an index is in same lane as the base offset.
	auto SafeOffset = [&](int Idx) {
	return OffsetLane == (Idx / NumEltsPerLane);
	};

	// Shift along an input so that the offset base moves to the first element.
	auto ShuffleOffset = [&](SDValue V) {
	if (!Offset)
	return V;

	SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
	for (int i = 0; i * Scale < NumElements; ++i) {
	int SrcIdx = i + Offset;
	ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1;
	}
	return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask);
	};

	// Found a valid zext mask! Try various lowering strategies based on the
	// input type and available ISA extensions.
	if (Subtarget.hasSSE41()) {
	// Not worth offsetting 128-bit vectors if scale == 2, a pattern using
	// PUNPCK will catch this in a later shuffle match.
	if (Offset && Scale == 2 && VT.is128BitVector())
	return SDValue();
	MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
	NumElements / Scale);
	InputV = ShuffleOffset(InputV);
	InputV = getExtendInVec(X86ISD::VZEXT, DL, ExtVT, InputV, DAG);
	return DAG.getBitcast(VT, InputV);
	}

	assert(VT.is128BitVector() && "Only 128-bit vectors can be extended.");

	// For any extends we can cheat for larger element sizes and use shuffle
	// instructions that can fold with a load and/or copy.
	if (AnyExt && EltBits == 32) {
	int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1,
	-1};
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
	DAG.getBitcast(MVT::v4i32, InputV),
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
	}
	if (AnyExt && EltBits == 16 && Scale > 2) {
	int PSHUFDMask[4] = {Offset / 2, -1,
	SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1};
	InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
	DAG.getBitcast(MVT::v4i32, InputV),
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
	int PSHUFWMask[4] = {1, -1, -1, -1};
	unsigned OddEvenOp = (Offset & 1 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW);
	return DAG.getBitcast(
	VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16,
	DAG.getBitcast(MVT::v8i16, InputV),
	getV4X86ShuffleImm8ForMask(PSHUFWMask, DL, DAG)));
	}

	// The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes
	// to 64-bits.
	if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget.hasSSE4A()) {
	assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
	assert(VT.is128BitVector() && "Unexpected vector width!");

	int LoIdx = Offset * EltBits;
	SDValue Lo = DAG.getBitcast(
	MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
	DAG.getConstant(EltBits, DL, MVT::i8),
	DAG.getConstant(LoIdx, DL, MVT::i8)));

	if (isUndefInRange(Mask, NumElements / 2, NumElements / 2) \|\|
	!SafeOffset(Offset + 1))
	return DAG.getBitcast(VT, Lo);

	int HiIdx = (Offset + 1) * EltBits;
	SDValue Hi = DAG.getBitcast(
	MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
	DAG.getConstant(EltBits, DL, MVT::i8),
	DAG.getConstant(HiIdx, DL, MVT::i8)));
	return DAG.getBitcast(VT,
	DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
	}

	// If this would require more than 2 unpack instructions to expand, use
	// pshufb when available. We can only use more than 2 unpack instructions
	// when zero extending i8 elements which also makes it easier to use pshufb.
	if (Scale > 4 && EltBits == 8 && Subtarget.hasSSSE3()) {
	assert(NumElements == 16 && "Unexpected byte vector width!");
	SDValue PSHUFBMask[16];
	for (int i = 0; i < 16; ++i) {
	int Idx = Offset + (i / Scale);
	PSHUFBMask[i] = DAG.getConstant(
	(i % Scale == 0 && SafeOffset(Idx)) ? Idx : 0x80, DL, MVT::i8);
	}
	InputV = DAG.getBitcast(MVT::v16i8, InputV);
	return DAG.getBitcast(
	VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
	DAG.getBuildVector(MVT::v16i8, DL, PSHUFBMask)));
	}

	// If we are extending from an offset, ensure we start on a boundary that
	// we can unpack from.
	int AlignToUnpack = Offset % (NumElements / Scale);
	if (AlignToUnpack) {
	SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
	for (int i = AlignToUnpack; i < NumElements; ++i)
	ShMask[i - AlignToUnpack] = i;
	InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask);
	Offset -= AlignToUnpack;
	}

	// Otherwise emit a sequence of unpacks.
	do {
	unsigned UnpackLoHi = X86ISD::UNPCKL;
	if (Offset >= (NumElements / 2)) {
	UnpackLoHi = X86ISD::UNPCKH;
	Offset -= (NumElements / 2);
	}

	MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
	SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT)
	: getZeroVector(InputVT, Subtarget, DAG, DL);
	InputV = DAG.getBitcast(InputVT, InputV);
	InputV = DAG.getNode(UnpackLoHi, DL, InputVT, InputV, Ext);
	Scale /= 2;
	EltBits *= 2;
	NumElements /= 2;
	} while (Scale > 1);
	return DAG.getBitcast(VT, InputV);
	}

	/// \brief Try to lower a vector shuffle as a zero extension on any microarch.
	///
	/// This routine will try to do everything in its power to cleverly lower
	/// a shuffle which happens to match the pattern of a zero extend. It doesn't
	/// check for the profitability of this lowering, it tries to aggressively
	/// match this pattern. It will use all of the micro-architectural details it
	/// can to emit an efficient lowering. It handles both blends with all-zero
	/// inputs to explicitly zero-extend and undef-lanes (sometimes undef due to
	/// masking out later).
	///
	/// The reason we have dedicated lowering for zext-style shuffles is that they
	/// are both incredibly common and often quite performance sensitive.
	static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	int Bits = VT.getSizeInBits();
	int NumLanes = Bits / 128;
	int NumElements = VT.getVectorNumElements();
	int NumEltsPerLane = NumElements / NumLanes;
	assert(VT.getScalarSizeInBits() <= 32 &&
	"Exceeds 32-bit integer zero extension limit");
	assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size");

	// Define a helper function to check a particular ext-scale and lower to it if
	// valid.
	auto Lower = [&](int Scale) -> SDValue {
	SDValue InputV;
	bool AnyExt = true;
	int Offset = 0;
	int Matches = 0;
	for (int i = 0; i < NumElements; ++i) {
	int M = Mask[i];
	if (M < 0)
	continue; // Valid anywhere but doesn't tell us anything.
	if (i % Scale != 0) {
	// Each of the extended elements need to be zeroable.
	if (!Zeroable[i])
	return SDValue();

	// We no longer are in the anyext case.
	AnyExt = false;
	continue;
	}

	// Each of the base elements needs to be consecutive indices into the
	// same input vector.
	SDValue V = M < NumElements ? V1 : V2;
	M = M % NumElements;
	if (!InputV) {
	InputV = V;
	Offset = M - (i / Scale);
	} else if (InputV != V)
	return SDValue(); // Flip-flopping inputs.

	// Offset must start in the lowest 128-bit lane or at the start of an
	// upper lane.
	// FIXME: Is it ever worth allowing a negative base offset?
	if (!((0 <= Offset && Offset < NumEltsPerLane) \|\|
	(Offset % NumEltsPerLane) == 0))
	return SDValue();

	// If we are offsetting, all referenced entries must come from the same
	// lane.
	if (Offset && (Offset / NumEltsPerLane) != (M / NumEltsPerLane))
	return SDValue();

	if ((M % NumElements) != (Offset + (i / Scale)))
	return SDValue(); // Non-consecutive strided elements.
	Matches++;
	}

	// If we fail to find an input, we have a zero-shuffle which should always
	// have already been handled.
	// FIXME: Maybe handle this here in case during blending we end up with one?
	if (!InputV)
	return SDValue();

	// If we are offsetting, don't extend if we only match a single input, we
	// can always do better by using a basic PSHUF or PUNPCK.
	if (Offset != 0 && Matches < 2)
	return SDValue();

	return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
	DL, VT, Scale, Offset, AnyExt, InputV, Mask, Subtarget, DAG);
	};

	// The widest scale possible for extending is to a 64-bit integer.
	assert(Bits % 64 == 0 &&
	"The number of bits in a vector must be divisible by 64 on x86!");
	int NumExtElements = Bits / 64;

	// Each iteration, try extending the elements half as much, but into twice as
	// many elements.
	for (; NumExtElements < NumElements; NumExtElements *= 2) {
	assert(NumElements % NumExtElements == 0 &&
	"The input vector size must be divisible by the extended size.");
	if (SDValue V = Lower(NumElements / NumExtElements))
	return V;
	}

	// General extends failed, but 128-bit vectors may be able to use MOVQ.
	if (Bits != 128)
	return SDValue();

	// Returns one of the source operands if the shuffle can be reduced to a
	// MOVQ, copying the lower 64-bits and zero-extending to the upper 64-bits.
	auto CanZExtLowHalf = [&]() {
	for (int i = NumElements / 2; i != NumElements; ++i)
	if (!Zeroable[i])
	return SDValue();
	if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0))
	return V1;
	if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements))
	return V2;
	return SDValue();
	};

	if (SDValue V = CanZExtLowHalf()) {
	V = DAG.getBitcast(MVT::v2i64, V);
	V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V);
	return DAG.getBitcast(VT, V);
	}

	// No viable ext lowering found.
	return SDValue();
	}

	/// \brief Try to get a scalar value for a specific element of a vector.
	///
	/// Looks through BUILD_VECTOR and SCALAR_TO_VECTOR nodes to find a scalar.
	static SDValue getScalarValueForVectorElement(SDValue V, int Idx,
	SelectionDAG &DAG) {
	MVT VT = V.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	V = peekThroughBitcasts(V);

	// If the bitcasts shift the element size, we can't extract an equivalent
	// element from it.
	MVT NewVT = V.getSimpleValueType();
	if (!NewVT.isVector() \|\| NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
	return SDValue();

	if (V.getOpcode() == ISD::BUILD_VECTOR \|\|
	(Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR)) {
	// Ensure the scalar operand is the same size as the destination.
	// FIXME: Add support for scalar truncation where possible.
	SDValue S = V.getOperand(Idx);
	if (EltVT.getSizeInBits() == S.getSimpleValueType().getSizeInBits())
	return DAG.getBitcast(EltVT, S);
	}

	return SDValue();
	}

	/// \brief Helper to test for a load that can be folded with x86 shuffles.
	///
	/// This is particularly important because the set of instructions varies
	/// significantly based on whether the operand is a load or not.
	static bool isShuffleFoldableLoad(SDValue V) {
	V = peekThroughBitcasts(V);
	return ISD::isNON_EXTLoad(V.getNode());
	}

	/// \brief Try to lower insertion of a single element into a zero vector.
	///
	/// This is a common pattern that we have especially efficient patterns to lower
	/// across all subtarget feature sets.
	static SDValue lowerVectorShuffleAsElementInsertion(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT ExtVT = VT;
	MVT EltVT = VT.getVectorElementType();

	int V2Index =
	find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) -
	Mask.begin();
	bool IsV1Zeroable = true;
	for (int i = 0, Size = Mask.size(); i < Size; ++i)
	if (i != V2Index && !Zeroable[i]) {
	IsV1Zeroable = false;
	break;
	}

	// Check for a single input from a SCALAR_TO_VECTOR node.
	// FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and
	// all the smarts here sunk into that routine. However, the current
	// lowering of BUILD_VECTOR makes that nearly impossible until the old
	// vector shuffle lowering is dead.
	SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
	DAG);
	if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
	// We need to zext the scalar if it is smaller than an i32.
	V2S = DAG.getBitcast(EltVT, V2S);
	if (EltVT == MVT::i8 \|\| EltVT == MVT::i16) {
	// Using zext to expand a narrow element won't work for non-zero
	// insertions.
	if (!IsV1Zeroable)
	return SDValue();

	// Zero-extend directly to i32.
	ExtVT = MVT::v4i32;
	V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S);
	}
	V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
	} else if (Mask[V2Index] != (int)Mask.size() \|\| EltVT == MVT::i8 \|\|
	EltVT == MVT::i16) {
	// Either not inserting from the low element of the input or the input
	// element size is too small to use VZEXT_MOVL to clear the high bits.
	return SDValue();
	}

	if (!IsV1Zeroable) {
	// If V1 can't be treated as a zero vector we have fewer options to lower
	// this. We can't support integer vectors or non-zero targets cheaply, and
	// the V1 elements can't be permuted in any way.
	assert(VT == ExtVT && "Cannot change extended type when non-zeroable!");
	if (!VT.isFloatingPoint() \|\| V2Index != 0)
	return SDValue();
	SmallVector<int, 8> V1Mask(Mask.begin(), Mask.end());
	V1Mask[V2Index] = -1;
	if (!isNoopShuffleMask(V1Mask))
	return SDValue();
	// This is essentially a special case blend operation, but if we have
	// general purpose blend operations, they are always faster. Bail and let
	// the rest of the lowering handle these as blends.
	if (Subtarget.hasSSE41())
	return SDValue();

	// Otherwise, use MOVSD or MOVSS.
	assert((EltVT == MVT::f32 \|\| EltVT == MVT::f64) &&
	"Only two types of floating point element types to handle!");
	return DAG.getNode(EltVT == MVT::f32 ? X86ISD::MOVSS : X86ISD::MOVSD, DL,
	ExtVT, V1, V2);
	}

	// This lowering only works for the low element with floating point vectors.
	if (VT.isFloatingPoint() && V2Index != 0)
	return SDValue();

	V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
	if (ExtVT != VT)
	V2 = DAG.getBitcast(VT, V2);

	if (V2Index != 0) {
	// If we have 4 or fewer lanes we can cheaply shuffle the element into
	// the desired position. Otherwise it is more efficient to do a vector
	// shift left. We know that we can do a vector shift left because all
	// the inputs are zero.
	if (VT.isFloatingPoint() \|\| VT.getVectorNumElements() <= 4) {
	SmallVector<int, 4> V2Shuffle(Mask.size(), 1);
	V2Shuffle[V2Index] = 0;
	V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
	} else {
	V2 = DAG.getBitcast(MVT::v16i8, V2);
	V2 = DAG.getNode(
	X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
	DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL,
	DAG.getTargetLoweringInfo().getScalarShiftAmountTy(
	DAG.getDataLayout(), VT)));
	V2 = DAG.getBitcast(VT, V2);
	}
	}
	return V2;
	}

	/// Try to lower broadcast of a single - truncated - integer element,
	/// coming from a scalar_to_vector/build_vector node \p V0 with larger elements.
	///
	/// This assumes we have AVX2.
	static SDValue lowerVectorShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT,
	SDValue V0, int BroadcastIdx,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.hasAVX2() &&
	"We can only lower integer broadcasts with AVX2!");

	EVT EltVT = VT.getVectorElementType();
	EVT V0VT = V0.getValueType();

	assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!");
	assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!");

	EVT V0EltVT = V0VT.getVectorElementType();
	if (!V0EltVT.isInteger())
	return SDValue();

	const unsigned EltSize = EltVT.getSizeInBits();
	const unsigned V0EltSize = V0EltVT.getSizeInBits();

	// This is only a truncation if the original element type is larger.
	if (V0EltSize <= EltSize)
	return SDValue();

	assert(((V0EltSize % EltSize) == 0) &&
	"Scalar type sizes must all be powers of 2 on x86!");

	const unsigned V0Opc = V0.getOpcode();
	const unsigned Scale = V0EltSize / EltSize;
	const unsigned V0BroadcastIdx = BroadcastIdx / Scale;

	if ((V0Opc != ISD::SCALAR_TO_VECTOR \|\| V0BroadcastIdx != 0) &&
	V0Opc != ISD::BUILD_VECTOR)
	return SDValue();

	SDValue Scalar = V0.getOperand(V0BroadcastIdx);

	// If we're extracting non-least-significant bits, shift so we can truncate.
	// Hopefully, we can fold away the trunc/srl/load into the broadcast.
	// Even if we can't (and !isShuffleFoldableLoad(Scalar)), prefer
	// vpbroadcast+vmovd+shr to vpshufb(m)+vmovd.
	if (const int OffsetIdx = BroadcastIdx % Scale)
	Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar,
	DAG.getConstant(OffsetIdx * EltSize, DL, Scalar.getValueType()));

	return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
	DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar));
	}

	/// \brief Try to lower broadcast of a single element.
	///
	/// For convenience, this code also bundles all of the subtarget feature set
	/// filtering. While a little annoying to re-dispatch on type here, there isn't
	/// a convenient way to factor it out.
	static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) \|\|
	(Subtarget.hasAVX() && VT.isFloatingPoint()) \|\|
	(Subtarget.hasAVX2() && VT.isInteger())))
	return SDValue();

	// With MOVDDUP (v2f64) we can broadcast from a register or a load, otherwise
	// we can only broadcast from a register with AVX2.
	unsigned NumElts = Mask.size();
	unsigned Opcode = VT == MVT::v2f64 ? X86ISD::MOVDDUP : X86ISD::VBROADCAST;
	bool BroadcastFromReg = (Opcode == X86ISD::MOVDDUP) \|\| Subtarget.hasAVX2();

	// Check that the mask is a broadcast.
	int BroadcastIdx = -1;
	for (int i = 0; i != (int)NumElts; ++i) {
	SmallVector<int, 8> BroadcastMask(NumElts, i);
	if (isShuffleEquivalent(V1, V2, Mask, BroadcastMask)) {
	BroadcastIdx = i;
	break;
	}
	}

	if (BroadcastIdx < 0)
	return SDValue();
	assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with "
	"a sorted mask where the broadcast "
	"comes from V1.");

	// Go up the chain of (vector) values to find a scalar load that we can
	// combine with the broadcast.
	SDValue V = V1;
	for (;;) {
	switch (V.getOpcode()) {
	case ISD::BITCAST: {
	SDValue VSrc = V.getOperand(0);
	MVT SrcVT = VSrc.getSimpleValueType();
	if (VT.getScalarSizeInBits() != SrcVT.getScalarSizeInBits())
	break;
	V = VSrc;
	continue;
	}
	case ISD::CONCAT_VECTORS: {
	int OperandSize = Mask.size() / V.getNumOperands();
	V = V.getOperand(BroadcastIdx / OperandSize);
	BroadcastIdx %= OperandSize;
	continue;
	}
	case ISD::INSERT_SUBVECTOR: {
	SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);
	auto ConstantIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
	if (!ConstantIdx)
	break;

	int BeginIdx = (int)ConstantIdx->getZExtValue();
	int EndIdx =
	BeginIdx + (int)VInner.getSimpleValueType().getVectorNumElements();
	if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) {
	BroadcastIdx -= BeginIdx;
	V = VInner;
	} else {
	V = VOuter;
	}
	continue;
	}
	}
	break;
	}

	// Check if this is a broadcast of a scalar. We special case lowering
	// for scalars so that we can more effectively fold with loads.
	// First, look through bitcast: if the original value has a larger element
	// type than the shuffle, the broadcast element is in essence truncated.
	// Make that explicit to ease folding.
	if (V.getOpcode() == ISD::BITCAST && VT.isInteger())
	if (SDValue TruncBroadcast = lowerVectorShuffleAsTruncBroadcast(
	DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG))
	return TruncBroadcast;

	MVT BroadcastVT = VT;

	// Peek through any bitcast (only useful for loads).
	SDValue BC = peekThroughBitcasts(V);

	// Also check the simpler case, where we can directly reuse the scalar.
	if (V.getOpcode() == ISD::BUILD_VECTOR \|\|
	(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
	V = V.getOperand(BroadcastIdx);

	// If we can't broadcast from a register, check that the input is a load.
	if (!BroadcastFromReg && !isShuffleFoldableLoad(V))
	return SDValue();
	} else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
	// 32-bit targets need to load i64 as a f64 and then bitcast the result.
	if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) {
	BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
	Opcode = (BroadcastVT.is128BitVector() ? X86ISD::MOVDDUP : Opcode);
	}

	// If we are broadcasting a load that is only used by the shuffle
	// then we can reduce the vector load to the broadcasted scalar load.
	LoadSDNode *Ld = cast<LoadSDNode>(BC);
	SDValue BaseAddr = Ld->getOperand(1);
	EVT SVT = BroadcastVT.getScalarType();
	unsigned Offset = BroadcastIdx * SVT.getStoreSize();
	SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
	V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
	DAG.getMachineFunction().getMachineMemOperand(
	Ld->getMemOperand(), Offset, SVT.getStoreSize()));
	DAG.makeEquivalentMemoryOrdering(Ld, V);
	} else if (!BroadcastFromReg) {
	// We can't broadcast from a vector register.
	return SDValue();
	} else if (BroadcastIdx != 0) {
	// We can only broadcast from the zero-element of a vector register,
	// but it can be advantageous to broadcast from the zero-element of a
	// subvector.
	if (!VT.is256BitVector() && !VT.is512BitVector())
	return SDValue();

	// VPERMQ/VPERMPD can perform the cross-lane shuffle directly.
	if (VT == MVT::v4f64 \|\| VT == MVT::v4i64)
	return SDValue();

	// Only broadcast the zero-element of a 128-bit subvector.
	unsigned EltSize = VT.getScalarSizeInBits();
	if (((BroadcastIdx * EltSize) % 128) != 0)
	return SDValue();

	// The shuffle input might have been a bitcast we looked through; look at
	// the original input vector. Emit an EXTRACT_SUBVECTOR of that type; we'll
	// later bitcast it to BroadcastVT.
	MVT SrcVT = V.getSimpleValueType();
	assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() &&
	"Unexpected vector element size");
	assert((SrcVT.is256BitVector() \|\| SrcVT.is512BitVector()) &&
	"Unexpected vector size");

	MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(), 128 / EltSize);
	V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V,
	DAG.getIntPtrConstant(BroadcastIdx, DL));
	}

	if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector())
	V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
	DAG.getBitcast(MVT::f64, V));

	// Bitcast back to the same scalar type as BroadcastVT.
	MVT SrcVT = V.getSimpleValueType();
	if (SrcVT.getScalarType() != BroadcastVT.getScalarType()) {
	assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() &&
	"Unexpected vector element size");
	if (SrcVT.isVector()) {
	unsigned NumSrcElts = SrcVT.getVectorNumElements();
	SrcVT = MVT::getVectorVT(BroadcastVT.getScalarType(), NumSrcElts);
	} else {
	SrcVT = BroadcastVT.getScalarType();
	}
	V = DAG.getBitcast(SrcVT, V);
	}

	// 32-bit targets need to load i64 as a f64 and then bitcast the result.
	if (!Subtarget.is64Bit() && SrcVT == MVT::i64) {
	V = DAG.getBitcast(MVT::f64, V);
	unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements();
	BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
	}

	// We only support broadcasting from 128-bit vectors to minimize the
	// number of patterns we need to deal with in isel. So extract down to
	// 128-bits.
	if (SrcVT.getSizeInBits() > 128)
	V = extract128BitVector(V, 0, DAG, DL);

	return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
	}

	// Check for whether we can use INSERTPS to perform the shuffle. We only use
	// INSERTPS when the V1 elements are already in the correct locations
	// because otherwise we can just always use two SHUFPS instructions which
	// are much smaller to encode than a SHUFPS and an INSERTPS. We can also
	// perform INSERTPS if a single V1 element is out of place and all V2
	// elements are zeroable.
	static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
	unsigned &InsertPSMask,
	const APInt &Zeroable,
	ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
	assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!");
	assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");

	// Attempt to match INSERTPS with one element from VA or VB being
	// inserted into VA (or undef). If successful, V1, V2 and InsertPSMask
	// are updated.
	auto matchAsInsertPS = [&](SDValue VA, SDValue VB,
	ArrayRef<int> CandidateMask) {
	unsigned ZMask = 0;
	int VADstIndex = -1;
	int VBDstIndex = -1;
	bool VAUsedInPlace = false;

	for (int i = 0; i < 4; ++i) {
	// Synthesize a zero mask from the zeroable elements (includes undefs).
	if (Zeroable[i]) {
	ZMask \|= 1 << i;
	continue;
	}

	// Flag if we use any VA inputs in place.
	if (i == CandidateMask[i]) {
	VAUsedInPlace = true;
	continue;
	}

	// We can only insert a single non-zeroable element.
	if (VADstIndex >= 0 \|\| VBDstIndex >= 0)
	return false;

	if (CandidateMask[i] < 4) {
	// VA input out of place for insertion.
	VADstIndex = i;
	} else {
	// VB input for insertion.
	VBDstIndex = i;
	}
	}

	// Don't bother if we have no (non-zeroable) element for insertion.
	if (VADstIndex < 0 && VBDstIndex < 0)
	return false;

	// Determine element insertion src/dst indices. The src index is from the
	// start of the inserted vector, not the start of the concatenated vector.
	unsigned VBSrcIndex = 0;
	if (VADstIndex >= 0) {
	// If we have a VA input out of place, we use VA as the V2 element
	// insertion and don't use the original V2 at all.
	VBSrcIndex = CandidateMask[VADstIndex];
	VBDstIndex = VADstIndex;
	VB = VA;
	} else {
	VBSrcIndex = CandidateMask[VBDstIndex] - 4;
	}

	// If no V1 inputs are used in place, then the result is created only from
	// the zero mask and the V2 insertion - so remove V1 dependency.
	if (!VAUsedInPlace)
	VA = DAG.getUNDEF(MVT::v4f32);

	// Update V1, V2 and InsertPSMask accordingly.
	V1 = VA;
	V2 = VB;

	// Insert the V2 element into the desired position.
	InsertPSMask = VBSrcIndex << 6 \| VBDstIndex << 4 \| ZMask;
	assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
	return true;
	};

	if (matchAsInsertPS(V1, V2, Mask))
	return true;

	// Commute and try again.
	SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end());
	ShuffleVectorSDNode::commuteMask(CommutedMask);
	if (matchAsInsertPS(V2, V1, CommutedMask))
	return true;

	return false;
	}

	static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");

	// Attempt to match the insertps pattern.
	unsigned InsertPSMask;
	if (!matchVectorShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG))
	return SDValue();

	// Insert the V2 element into the desired position.
	return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
	DAG.getConstant(InsertPSMask, DL, MVT::i8));
	}

	/// \brief Try to lower a shuffle as a permute of the inputs followed by an
	/// UNPCK instruction.
	///
	/// This specifically targets cases where we end up with alternating between
	/// the two inputs, and so can permute them into something that feeds a single
	/// UNPCK instruction. Note that this routine only targets integer vectors
	/// because for floating point vectors we have a generalized SHUFPS lowering
	/// strategy that handles everything that doesn't exactly match an unpack,
	/// making this clever lowering unnecessary.
	static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	assert(!VT.isFloatingPoint() &&
	"This routine only supports integer vectors.");
	assert(VT.is128BitVector() &&
	"This routine only works on 128-bit vectors.");
	assert(!V2.isUndef() &&
	"This routine should only be used when blending two inputs.");
	assert(Mask.size() >= 2 && "Single element masks are invalid.");

	int Size = Mask.size();

	int NumLoInputs =
	count_if(Mask, [Size](int M) { return M >= 0 && M % Size < Size / 2; });
	int NumHiInputs =
	count_if(Mask, [Size](int M) { return M % Size >= Size / 2; });

	bool UnpackLo = NumLoInputs >= NumHiInputs;

	auto TryUnpack = [&](int ScalarSize, int Scale) {
	SmallVector<int, 16> V1Mask((unsigned)Size, -1);
	SmallVector<int, 16> V2Mask((unsigned)Size, -1);

	for (int i = 0; i < Size; ++i) {
	if (Mask[i] < 0)
	continue;

	// Each element of the unpack contains Scale elements from this mask.
	int UnpackIdx = i / Scale;

	// We only handle the case where V1 feeds the first slots of the unpack.
	// We rely on canonicalization to ensure this is the case.
	if ((UnpackIdx % 2 == 0) != (Mask[i] < Size))
	return SDValue();

	// Setup the mask for this input. The indexing is tricky as we have to
	// handle the unpack stride.
	SmallVectorImpl<int> &VMask = (UnpackIdx % 2 == 0) ? V1Mask : V2Mask;
	VMask[(UnpackIdx / 2) * Scale + i % Scale + (UnpackLo ? 0 : Size / 2)] =
	Mask[i] % Size;
	}

	// If we will have to shuffle both inputs to use the unpack, check whether
	// we can just unpack first and shuffle the result. If so, skip this unpack.
	if ((NumLoInputs == 0 \|\| NumHiInputs == 0) && !isNoopShuffleMask(V1Mask) &&
	!isNoopShuffleMask(V2Mask))
	return SDValue();

	// Shuffle the inputs into place.
	V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
	V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);

	// Cast the inputs to the type we will use to unpack them.
	MVT UnpackVT = MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), Size / Scale);
	V1 = DAG.getBitcast(UnpackVT, V1);
	V2 = DAG.getBitcast(UnpackVT, V2);

	// Unpack the inputs and cast the result back to the desired type.
	return DAG.getBitcast(
	VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
	UnpackVT, V1, V2));
	};

	// We try each unpack from the largest to the smallest to try and find one
	// that fits this mask.
	int OrigScalarSize = VT.getScalarSizeInBits();
	for (int ScalarSize = 64; ScalarSize >= OrigScalarSize; ScalarSize /= 2)
	if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize))
	return Unpack;

	// If none of the unpack-rooted lowerings worked (or were profitable) try an
	// initial unpack.
	if (NumLoInputs == 0 \|\| NumHiInputs == 0) {
	assert((NumLoInputs > 0 \|\| NumHiInputs > 0) &&
	"We have to have some inputs!");
	int HalfOffset = NumLoInputs == 0 ? Size / 2 : 0;

	// FIXME: We could consider the total complexity of the permute of each
	// possible unpacking. Or at the least we should consider how many
	// half-crossings are created.
	// FIXME: We could consider commuting the unpacks.

	SmallVector<int, 32> PermMask((unsigned)Size, -1);
	for (int i = 0; i < Size; ++i) {
	if (Mask[i] < 0)
	continue;

	assert(Mask[i] % Size >= HalfOffset && "Found input from wrong half!");

	PermMask[i] =
	2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1);
	}
	return DAG.getVectorShuffle(
	VT, DL, DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL,
	DL, VT, V1, V2),
	DAG.getUNDEF(VT), PermMask);
	}

	return SDValue();
	}

	/// \brief Handle lowering of 2-lane 64-bit floating point shuffles.
	///
	/// This is the basis function for the 2-lane 64-bit shuffles as we have full
	/// support for floating point shuffles but not integer shuffles. These
	/// instructions will incur a domain crossing penalty on some chips though so
	/// it is better to avoid lowering through this for integer vectors where
	/// possible.
	static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
	assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");

	if (V2.isUndef()) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
	DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG))
	return Broadcast;

	// Straight shuffle of a single input vector. Simulate this by using the
	// single input as both of the "inputs" to this instruction..
	unsigned SHUFPDMask = (Mask[0] == 1) \| ((Mask[1] == 1) << 1);

	if (Subtarget.hasAVX()) {
	// If we have AVX, we can use VPERMILPS which will allow folding a load
	// into the shuffle.
	return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
	DAG.getConstant(SHUFPDMask, DL, MVT::i8));
	}

	return DAG.getNode(
	X86ISD::SHUFP, DL, MVT::v2f64,
	Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
	Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
	DAG.getConstant(SHUFPDMask, DL, MVT::i8));
	}
	assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
	assert(Mask[1] >= 2 && "Non-canonicalized blend!");

	// If we have a single input, insert that into V1 if we can do so cheaply.
	if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
	if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
	DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return Insertion;
	// Try inverting the insertion since for v2 masks it is easy to do and we
	// can't reliably sort the mask one way or the other.
	int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
	Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
	if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
	DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
	return Insertion;
	}

	// Try to use one of the special instruction patterns to handle two common
	// blend patterns if a zero-blend above didn't work.
	if (isShuffleEquivalent(V1, V2, Mask, {0, 3}) \|\|
	isShuffleEquivalent(V1, V2, Mask, {1, 3}))
	if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG))
	// We can either use a special instruction to load over the low double or
	// to move just the low double.
	return DAG.getNode(
	isShuffleFoldableLoad(V1S) ? X86ISD::MOVLPD : X86ISD::MOVSD,
	DL, MVT::v2f64, V2,
	DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S));

	if (Subtarget.hasSSE41())
	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG))
	return V;

	unsigned SHUFPDMask = (Mask[0] == 1) \| (((Mask[1] - 2) == 1) << 1);
	return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2,
	DAG.getConstant(SHUFPDMask, DL, MVT::i8));
	}

	/// \brief Handle lowering of 2-lane 64-bit integer shuffles.
	///
	/// Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by
	/// the integer unit to minimize domain crossing penalties. However, for blends
	/// it falls back to the floating point shuffle operation with appropriate bit
	/// casting.
	static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
	assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");

	if (V2.isUndef()) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
	DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
	return Broadcast;

	// Straight shuffle of a single input vector. For everything from SSE2
	// onward this has a single fast instruction with no scary immediates.
	// We have to map the mask as it is actually a v4i32 shuffle instruction.
	V1 = DAG.getBitcast(MVT::v4i32, V1);
	int WidenedMask[4] = {
	std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1,
	std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1};
	return DAG.getBitcast(
	MVT::v2i64,
	DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
	getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG)));
	}
	assert(Mask[0] != -1 && "No undef lanes in multi-input v2 shuffles!");
	assert(Mask[1] != -1 && "No undef lanes in multi-input v2 shuffles!");
	assert(Mask[0] < 2 && "We sort V1 to be the first input.");
	assert(Mask[1] >= 2 && "We sort V2 to be the second input.");

	// If we have a blend of two same-type PACKUS operations and the blend aligns
	// with the low and high halves, we can just merge the PACKUS operations.
	// This is particularly important as it lets us merge shuffles that this
	// routine itself creates.
	auto GetPackNode = [](SDValue V) {
	V = peekThroughBitcasts(V);
	return V.getOpcode() == X86ISD::PACKUS ? V : SDValue();
	};
	if (SDValue V1Pack = GetPackNode(V1))
	if (SDValue V2Pack = GetPackNode(V2)) {
	EVT PackVT = V1Pack.getValueType();
	if (PackVT == V2Pack.getValueType())
	return DAG.getBitcast(MVT::v2i64,
	DAG.getNode(X86ISD::PACKUS, DL, PackVT,
	Mask[0] == 0 ? V1Pack.getOperand(0)
	: V1Pack.getOperand(1),
	Mask[1] == 2 ? V2Pack.getOperand(0)
	: V2Pack.getOperand(1)));
	}

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// When loading a scalar and then shuffling it into a vector we can often do
	// the insertion cheaply.
	if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
	DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return Insertion;
	// Try inverting the insertion since for v2 masks it is easy to do and we
	// can't reliably sort the mask one way or the other.
	int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2};
	if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
	DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
	return Insertion;

	// We have different paths for blend lowering, but they all must use the
	// exact same predicate.
	bool IsBlendSupported = Subtarget.hasSSE41();
	if (IsBlendSupported)
	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG))
	return V;

	// Try to use byte rotation instructions.
	// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
	if (Subtarget.hasSSSE3())
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
	DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
	return Rotate;

	// If we have direct support for blends, we should lower by decomposing into
	// a permute. That will be faster than the domain cross.
	if (IsBlendSupported)
	return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2,
	Mask, DAG);

	// We implement this with SHUFPD which is pretty lame because it will likely
	// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
	// However, all the alternatives are still more cycles and newer chips don't
	// have this problem. It would be really nice if x86 had better shuffles here.
	V1 = DAG.getBitcast(MVT::v2f64, V1);
	V2 = DAG.getBitcast(MVT::v2f64, V2);
	return DAG.getBitcast(MVT::v2i64,
	DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
	}

	/// \brief Test whether this can be lowered with a single SHUFPS instruction.
	///
	/// This is used to disable more specialized lowerings when the shufps lowering
	/// will happen to be efficient.
	static bool isSingleSHUFPSMask(ArrayRef<int> Mask) {
	// This routine only handles 128-bit shufps.
	assert(Mask.size() == 4 && "Unsupported mask size!");
	assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!");
	assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!");
	assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!");
	assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!");

	// To lower with a single SHUFPS we need to have the low half and high half
	// each requiring a single input.
	if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4))
	return false;
	if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4))
	return false;

	return true;
	}

	/// \brief Lower a vector shuffle using the SHUFPS instruction.
	///
	/// This is a helper routine dedicated to lowering vector shuffles using SHUFPS.
	/// It makes no assumptions about whether this is the best lowering, it simply
	/// uses it.
	static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
	ArrayRef<int> Mask, SDValue V1,
	SDValue V2, SelectionDAG &DAG) {
	SDValue LowV = V1, HighV = V2;
	int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]};

	int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });

	if (NumV2Elements == 1) {
	int V2Index = find_if(Mask, [](int M) { return M >= 4; }) - Mask.begin();

	// Compute the index adjacent to V2Index and in the same half by toggling
	// the low bit.
	int V2AdjIndex = V2Index ^ 1;

	if (Mask[V2AdjIndex] < 0) {
	// Handles all the cases where we have a single V2 element and an undef.
	// This will only ever happen in the high lanes because we commute the
	// vector otherwise.
	if (V2Index < 2)
	std::swap(LowV, HighV);
	NewMask[V2Index] -= 4;
	} else {
	// Handle the case where the V2 element ends up adjacent to a V1 element.
	// To make this work, blend them together as the first step.
	int V1Index = V2AdjIndex;
	int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};
	V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
	getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG));

	// Now proceed to reconstruct the final blend as we have the necessary
	// high or low half formed.
	if (V2Index < 2) {
	LowV = V2;
	HighV = V1;
	} else {
	HighV = V2;
	}
	NewMask[V1Index] = 2; // We put the V1 element in V2[2].
	NewMask[V2Index] = 0; // We shifted the V2 element into V2[0].
	}
	} else if (NumV2Elements == 2) {
	if (Mask[0] < 4 && Mask[1] < 4) {
	// Handle the easy case where we have V1 in the low lanes and V2 in the
	// high lanes.
	NewMask[2] -= 4;
	NewMask[3] -= 4;
	} else if (Mask[2] < 4 && Mask[3] < 4) {
	// We also handle the reversed case because this utility may get called
	// when we detect a SHUFPS pattern but can't easily commute the shuffle to
	// arrange things in the right direction.
	NewMask[0] -= 4;
	NewMask[1] -= 4;
	HighV = V1;
	LowV = V2;
	} else {
	// We have a mixture of V1 and V2 in both low and high lanes. Rather than
	// trying to place elements directly, just blend them and set up the final
	// shuffle to place them.

	// The first two blend mask elements are for V1, the second two are for
	// V2.
	int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1],
	Mask[2] < 4 ? Mask[2] : Mask[3],
	(Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,
	(Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};
	V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
	getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG));

	// Now we do a normal shuffle of V1 by giving V1 as both operands to
	// a blend.
	LowV = HighV = V1;
	NewMask[0] = Mask[0] < 4 ? 0 : 2;
	NewMask[1] = Mask[0] < 4 ? 2 : 0;
	NewMask[2] = Mask[2] < 4 ? 1 : 3;
	NewMask[3] = Mask[2] < 4 ? 3 : 1;
	}
	}
	return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV,
	getV4X86ShuffleImm8ForMask(NewMask, DL, DAG));
	}

	/// \brief Lower 4-lane 32-bit floating point shuffles.
	///
	/// Uses instructions exclusively from the floating point unit to minimize
	/// domain crossing penalties, as these are sufficient to implement all v4f32
	/// shuffles.
	static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
	assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");

	int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });

	if (NumV2Elements == 0) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
	DL, MVT::v4f32, V1, V2, Mask, Subtarget, DAG))
	return Broadcast;

	// Use even/odd duplicate instructions for masks that match their pattern.
	if (Subtarget.hasSSE3()) {
	if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2}))
	return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1);
	if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3}))
	return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1);
	}

	if (Subtarget.hasAVX()) {
	// If we have AVX, we can use VPERMILPS which will allow folding a load
	// into the shuffle.
	return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1,
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
	}

	// Otherwise, use a straight shuffle of a single input vector. We pass the
	// input vector to both operands to simulate this with a SHUFPS.
	return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
	}

	// There are special ways we can lower some single-element blends. However, we
	// have custom ways we can lower more complex single-element blends below that
	// we defer to if both this and BLENDPS fail to match, so restrict this to
	// when the V2 input is targeting element 0 of the mask -- that is the fast
	// case here.
	if (NumV2Elements == 1 && Mask[0] >= 4)
	if (SDValue V = lowerVectorShuffleAsElementInsertion(
	DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return V;

	if (Subtarget.hasSSE41()) {
	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Use INSERTPS if we can complete the shuffle efficiently.
	if (SDValue V =
	lowerVectorShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG))
	return V;

	if (!isSingleSHUFPSMask(Mask))
	if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(
	DL, MVT::v4f32, V1, V2, Mask, DAG))
	return BlendPerm;
	}

	// Use low/high mov instructions.
	if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5}))
	return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2);
	if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7}))
	return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1);

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG))
	return V;

	// Otherwise fall back to a SHUFPS lowering strategy.
	return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
	}

	/// \brief Lower 4-lane i32 vector shuffles.
	///
	/// We try to handle these with integer-domain shuffles where we can, but for
	/// blends we use the floating point domain blend instructions.
	static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
	assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
	DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });

	if (NumV2Elements == 0) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
	DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
	return Broadcast;

	// Straight shuffle of a single input vector. For everything from SSE2
	// onward this has a single fast instruction with no scary immediates.
	// We coerce the shuffle pattern to be compatible with UNPCK instructions
	// but we aren't actually going to use the UNPCK instruction because doing
	// so prevents folding a load into this instruction or making a copy.
	const int UnpackLoMask[] = {0, 0, 1, 1};
	const int UnpackHiMask[] = {2, 2, 3, 3};
	if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 1, 1}))
	Mask = UnpackLoMask;
	else if (isShuffleEquivalent(V1, V2, Mask, {2, 2, 3, 3}))
	Mask = UnpackHiMask;

	return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
	}

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// There are special ways we can lower some single-element blends.
	if (NumV2Elements == 1)
	if (SDValue V = lowerVectorShuffleAsElementInsertion(
	DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return V;

	// We have different paths for blend lowering, but they all must use the
	// exact same predicate.
	bool IsBlendSupported = Subtarget.hasSSE41();
	if (IsBlendSupported)
	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
	Zeroable, DAG))
	return Masked;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG))
	return V;

	// Try to use byte rotation instructions.
	// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
	if (Subtarget.hasSSSE3())
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
	DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
	return Rotate;

	// Assume that a single SHUFPS is faster than an alternative sequence of
	// multiple instructions (even if the CPU has a domain penalty).
	// If some CPU is harmed by the domain switch, we can fix it in a later pass.
	if (!isSingleSHUFPSMask(Mask)) {
	// If we have direct support for blends, we should lower by decomposing into
	// a permute. That will be faster than the domain cross.
	if (IsBlendSupported)
	return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2,
	Mask, DAG);

	// Try to lower by permuting the inputs into an unpack instruction.
	if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
	DL, MVT::v4i32, V1, V2, Mask, DAG))
	return Unpack;
	}

	// We implement this with SHUFPS because it can blend from two vectors.
	// Because we're going to eventually use SHUFPS, we use SHUFPS even to build
	// up the inputs, bypassing domain shift penalties that we would incur if we
	// directly used PSHUFD on Nehalem and older. For newer chips, this isn't
	// relevant.
	SDValue CastV1 = DAG.getBitcast(MVT::v4f32, V1);
	SDValue CastV2 = DAG.getBitcast(MVT::v4f32, V2);
	SDValue ShufPS = DAG.getVectorShuffle(MVT::v4f32, DL, CastV1, CastV2, Mask);
	return DAG.getBitcast(MVT::v4i32, ShufPS);
	}

	/// \brief Lowering of single-input v8i16 shuffles is the cornerstone of SSE2
	/// shuffle lowering, and the most complex part.
	///
	/// The lowering strategy is to try to form pairs of input lanes which are
	/// targeted at the same half of the final vector, and then use a dword shuffle
	/// to place them onto the right half, and finally unpack the paired lanes into
	/// their final position.
	///
	/// The exact breakdown of how to form these dword pairs and align them on the
	/// correct sides is really tricky. See the comments within the function for
	/// more of the details.
	///
	/// This code also handles repeated 128-bit lanes of v8i16 shuffles, but each
	/// lane must shuffle the exact same way. In fact, you must pass a v8 Mask to
	/// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16
	/// vector, form the analogous 128-bit 8-element Mask.
	static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
	const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
	const X86Subtarget &Subtarget, SelectionDAG &DAG) {
	assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");
	MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);

	assert(Mask.size() == 8 && "Shuffle mask length doesn't match!");
	MutableArrayRef<int> LoMask = Mask.slice(0, 4);
	MutableArrayRef<int> HiMask = Mask.slice(4, 4);

	SmallVector<int, 4> LoInputs;
	copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; });
	std::sort(LoInputs.begin(), LoInputs.end());
	LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end());
	SmallVector<int, 4> HiInputs;
	copy_if(HiMask, std::back_inserter(HiInputs), [](int M) { return M >= 0; });
	std::sort(HiInputs.begin(), HiInputs.end());
	HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end());
	int NumLToL =
	std::lower_bound(LoInputs.begin(), LoInputs.end(), 4) - LoInputs.begin();
	int NumHToL = LoInputs.size() - NumLToL;
	int NumLToH =
	std::lower_bound(HiInputs.begin(), HiInputs.end(), 4) - HiInputs.begin();
	int NumHToH = HiInputs.size() - NumLToH;
	MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL);
	MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH);
	MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
	MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);

	// If we are splatting two values from one half - one to each half, then
	// we can shuffle that half so each is splatted to a dword, then splat those
	// to their respective halves.
	auto SplatHalfs = [&](int LoInput, int HiInput, unsigned ShufWOp,
	int DOffset) {
	int PSHUFHalfMask[] = {LoInput % 4, LoInput % 4, HiInput % 4, HiInput % 4};
	int PSHUFDMask[] = {DOffset + 0, DOffset + 0, DOffset + 1, DOffset + 1};
	V = DAG.getNode(ShufWOp, DL, VT, V,
	getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));
	V = DAG.getBitcast(PSHUFDVT, V);
	V = DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, V,
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
	return DAG.getBitcast(VT, V);
	};

	if (NumLToL == 1 && NumLToH == 1 && (NumHToL + NumHToH) == 0)
	return SplatHalfs(LToLInputs[0], LToHInputs[0], X86ISD::PSHUFLW, 0);
	if (NumHToL == 1 && NumHToH == 1 && (NumLToL + NumLToH) == 0)
	return SplatHalfs(HToLInputs[0], HToHInputs[0], X86ISD::PSHUFHW, 2);

	// Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
	// such inputs we can swap two of the dwords across the half mark and end up
	// with <=2 inputs to each half in each half. Once there, we can fall through
	// to the generic code below. For example:
	//
	// Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
	// Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5]
	//
	// However in some very rare cases we have a 1-into-3 or 3-into-1 on one half
	// and an existing 2-into-2 on the other half. In this case we may have to
	// pre-shuffle the 2-into-2 half to avoid turning it into a 3-into-1 or
	// 1-into-3 which could cause us to cycle endlessly fixing each side in turn.
	// Fortunately, we don't have to handle anything but a 2-into-2 pattern
	// because any other situation (including a 3-into-1 or 1-into-3 in the other
	// half than the one we target for fixing) will be fixed when we re-enter this
	// path. We will also combine away any sequence of PSHUFD instructions that
	// result into a single instruction. Here is an example of the tricky case:
	//
	// Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
	// Mask: [3, 7, 1, 0, 2, 7, 3, 5] -THIS-IS-BAD!!!!-> [5, 7, 1, 0, 4, 7, 5, 3]
	//
	// This now has a 1-into-3 in the high half! Instead, we do two shuffles:
	//
	// Input: [a, b, c, d, e, f, g, h] PSHUFHW[0,2,1,3]-> [a, b, c, d, e, g, f, h]
	// Mask: [3, 7, 1, 0, 2, 7, 3, 5] -----------------> [3, 7, 1, 0, 2, 7, 3, 6]
	//
	// Input: [a, b, c, d, e, g, f, h] -PSHUFD[0,2,1,3]-> [a, b, e, g, c, d, f, h]
	// Mask: [3, 7, 1, 0, 2, 7, 3, 6] -----------------> [5, 7, 1, 0, 4, 7, 5, 6]
	//
	// The result is fine to be handled by the generic logic.
	auto balanceSides = [&](ArrayRef<int> AToAInputs, ArrayRef<int> BToAInputs,
	ArrayRef<int> BToBInputs, ArrayRef<int> AToBInputs,
	int AOffset, int BOffset) {
	assert((AToAInputs.size() == 3 \|\| AToAInputs.size() == 1) &&
	"Must call this with A having 3 or 1 inputs from the A half.");
	assert((BToAInputs.size() == 1 \|\| BToAInputs.size() == 3) &&
	"Must call this with B having 1 or 3 inputs from the B half.");
	assert(AToAInputs.size() + BToAInputs.size() == 4 &&
	"Must call this with either 3:1 or 1:3 inputs (summing to 4).");

	bool ThreeAInputs = AToAInputs.size() == 3;

	// Compute the index of dword with only one word among the three inputs in
	// a half by taking the sum of the half with three inputs and subtracting
	// the sum of the actual three inputs. The difference is the remaining
	// slot.
	int ADWord, BDWord;
	int &TripleDWord = ThreeAInputs ? ADWord : BDWord;
	int &OneInputDWord = ThreeAInputs ? BDWord : ADWord;
	int TripleInputOffset = ThreeAInputs ? AOffset : BOffset;
	ArrayRef<int> TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs;
	int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0];
	int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
	int TripleNonInputIdx =
	TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
	TripleDWord = TripleNonInputIdx / 2;

	// We use xor with one to compute the adjacent DWord to whichever one the
	// OneInput is in.
	OneInputDWord = (OneInput / 2) ^ 1;

	// Check for one tricky case: We're fixing a 3<-1 or a 1<-3 shuffle for AToA
	// and BToA inputs. If there is also such a problem with the BToB and AToB
	// inputs, we don't try to fix it necessarily -- we'll recurse and see it in
	// the next pass. However, if we have a 2<-2 in the BToB and AToB inputs, it
	// is essential that we don't create a 3<-1 as then we might oscillate.
	if (BToBInputs.size() == 2 && AToBInputs.size() == 2) {
	// Compute how many inputs will be flipped by swapping these DWords. We
	// need
	// to balance this to ensure we don't form a 3-1 shuffle in the other
	// half.
	int NumFlippedAToBInputs =
	std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord) +
	std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord + 1);
	int NumFlippedBToBInputs =
	std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord) +
	std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord + 1);
	if ((NumFlippedAToBInputs == 1 &&
	(NumFlippedBToBInputs == 0 \|\| NumFlippedBToBInputs == 2)) \|\|
	(NumFlippedBToBInputs == 1 &&
	(NumFlippedAToBInputs == 0 \|\| NumFlippedAToBInputs == 2))) {
	// We choose whether to fix the A half or B half based on whether that
	// half has zero flipped inputs. At zero, we may not be able to fix it
	// with that half. We also bias towards fixing the B half because that
	// will more commonly be the high half, and we have to bias one way.
	auto FixFlippedInputs = [&V, &DL, &Mask, &DAG](int PinnedIdx, int DWord,
	ArrayRef<int> Inputs) {
	int FixIdx = PinnedIdx ^ 1; // The adjacent slot to the pinned slot.
	bool IsFixIdxInput = is_contained(Inputs, PinnedIdx ^ 1);
	// Determine whether the free index is in the flipped dword or the
	// unflipped dword based on where the pinned index is. We use this bit
	// in an xor to conditionally select the adjacent dword.
	int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord));
	bool IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx);
	if (IsFixIdxInput == IsFixFreeIdxInput)
	FixFreeIdx += 1;
	IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx);
	assert(IsFixIdxInput != IsFixFreeIdxInput &&
	"We need to be changing the number of flipped inputs!");
	int PSHUFHalfMask[] = {0, 1, 2, 3};
	std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]);
	V = DAG.getNode(
	FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL,
	MVT::getVectorVT(MVT::i16, V.getValueSizeInBits() / 16), V,
	getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));

	for (int &M : Mask)
	if (M >= 0 && M == FixIdx)
	M = FixFreeIdx;
	else if (M >= 0 && M == FixFreeIdx)
	M = FixIdx;
	};
	if (NumFlippedBToBInputs != 0) {
	int BPinnedIdx =
	BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
	FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs);
	} else {
	assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!");
	int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput;
	FixFlippedInputs(APinnedIdx, ADWord, AToBInputs);
	}
	}
	}

	int PSHUFDMask[] = {0, 1, 2, 3};
	PSHUFDMask[ADWord] = BDWord;
	PSHUFDMask[BDWord] = ADWord;
	V = DAG.getBitcast(
	VT,
	DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V),
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));

	// Adjust the mask to match the new locations of A and B.
	for (int &M : Mask)
	if (M >= 0 && M/2 == ADWord)
	M = 2 * BDWord + M % 2;
	else if (M >= 0 && M/2 == BDWord)
	M = 2 * ADWord + M % 2;

	// Recurse back into this routine to re-compute state now that this isn't
	// a 3 and 1 problem.
	return lowerV8I16GeneralSingleInputVectorShuffle(DL, VT, V, Mask, Subtarget,
	DAG);
	};
	if ((NumLToL == 3 && NumHToL == 1) \|\| (NumLToL == 1 && NumHToL == 3))
	return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
	if ((NumHToH == 3 && NumLToH == 1) \|\| (NumHToH == 1 && NumLToH == 3))
	return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0);

	// At this point there are at most two inputs to the low and high halves from
	// each half. That means the inputs can always be grouped into dwords and
	// those dwords can then be moved to the correct half with a dword shuffle.
	// We use at most one low and one high word shuffle to collect these paired
	// inputs into dwords, and finally a dword shuffle to place them.
	int PSHUFLMask[4] = {-1, -1, -1, -1};
	int PSHUFHMask[4] = {-1, -1, -1, -1};
	int PSHUFDMask[4] = {-1, -1, -1, -1};

	// First fix the masks for all the inputs that are staying in their
	// original halves. This will then dictate the targets of the cross-half
	// shuffles.
	auto fixInPlaceInputs =
	[&PSHUFDMask](ArrayRef<int> InPlaceInputs, ArrayRef<int> IncomingInputs,
	MutableArrayRef<int> SourceHalfMask,
	MutableArrayRef<int> HalfMask, int HalfOffset) {
	if (InPlaceInputs.empty())
	return;
	if (InPlaceInputs.size() == 1) {
	SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
	InPlaceInputs[0] - HalfOffset;
	PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
	return;
	}
	if (IncomingInputs.empty()) {
	// Just fix all of the in place inputs.
	for (int Input : InPlaceInputs) {
	SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;
	PSHUFDMask[Input / 2] = Input / 2;
	}
	return;
	}

	assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
	SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
	InPlaceInputs[0] - HalfOffset;
	// Put the second input next to the first so that they are packed into
	// a dword. We find the adjacent index by toggling the low bit.
	int AdjIndex = InPlaceInputs[0] ^ 1;
	SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
	std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex);
	PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
	};
	fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0);
	fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4);

	// Now gather the cross-half inputs and place them into a free dword of
	// their target half.
	// FIXME: This operation could almost certainly be simplified dramatically to
	// look more like the 3-1 fixing operation.
	auto moveInputsToRightHalf = [&PSHUFDMask](
	MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs,
	MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask,
	MutableArrayRef<int> FinalSourceHalfMask, int SourceOffset,
	int DestOffset) {
	auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) {
	return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word;
	};
	auto isDWordClobbered = [&isWordClobbered](ArrayRef<int> SourceHalfMask,
	int Word) {
	int LowWord = Word & ~1;
	int HighWord = Word \| 1;
	return isWordClobbered(SourceHalfMask, LowWord) \|\|
	isWordClobbered(SourceHalfMask, HighWord);
	};

	if (IncomingInputs.empty())
	return;

	if (ExistingInputs.empty()) {
	// Map any dwords with inputs from them into the right half.
	for (int Input : IncomingInputs) {
	// If the source half mask maps over the inputs, turn those into
	// swaps and use the swapped lane.
	if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) {
	if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] < 0) {
	SourceHalfMask[SourceHalfMask[Input - SourceOffset]] =
	Input - SourceOffset;
	// We have to swap the uses in our half mask in one sweep.
	for (int &M : HalfMask)
	if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset)
	M = Input;
	else if (M == Input)
	M = SourceHalfMask[Input - SourceOffset] + SourceOffset;
	} else {
	assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] ==
	Input - SourceOffset &&
	"Previous placement doesn't match!");
	}
	// Note that this correctly re-maps both when we do a swap and when
	// we observe the other side of the swap above. We rely on that to
	// avoid swapping the members of the input list directly.
	Input = SourceHalfMask[Input - SourceOffset] + SourceOffset;
	}

	// Map the input's dword into the correct half.
	if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] < 0)
	PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2;
	else
	assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] ==
	Input / 2 &&
	"Previous placement doesn't match!");
	}

	// And just directly shift any other-half mask elements to be same-half
	// as we will have mirrored the dword containing the element into the
	// same position within that half.
	for (int &M : HalfMask)
	if (M >= SourceOffset && M < SourceOffset + 4) {
	M = M - SourceOffset + DestOffset;
	assert(M >= 0 && "This should never wrap below zero!");
	}
	return;
	}

	// Ensure we have the input in a viable dword of its current half. This
	// is particularly tricky because the original position may be clobbered
	// by inputs being moved and staying in that half.
	if (IncomingInputs.size() == 1) {
	if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
	int InputFixed = find(SourceHalfMask, -1) - std::begin(SourceHalfMask) +
	SourceOffset;
	SourceHalfMask[InputFixed - SourceOffset] =
	IncomingInputs[0] - SourceOffset;
	std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0],
	InputFixed);
	IncomingInputs[0] = InputFixed;
	}
	} else if (IncomingInputs.size() == 2) {
	if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 \|\|
	isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
	// We have two non-adjacent or clobbered inputs we need to extract from
	// the source half. To do this, we need to map them into some adjacent
	// dword slot in the source mask.
	int InputsFixed[2] = {IncomingInputs[0] - SourceOffset,
	IncomingInputs[1] - SourceOffset};

	// If there is a free slot in the source half mask adjacent to one of
	// the inputs, place the other input in it. We use (Index XOR 1) to
	// compute an adjacent index.
	if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) &&
	SourceHalfMask[InputsFixed[0] ^ 1] < 0) {
	SourceHalfMask[InputsFixed[0]] = InputsFixed[0];
	SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
	InputsFixed[1] = InputsFixed[0] ^ 1;
	} else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) &&
	SourceHalfMask[InputsFixed[1] ^ 1] < 0) {
	SourceHalfMask[InputsFixed[1]] = InputsFixed[1];
	SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0];
	InputsFixed[0] = InputsFixed[1] ^ 1;
	} else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] < 0 &&
	SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] < 0) {
	// The two inputs are in the same DWord but it is clobbered and the
	// adjacent DWord isn't used at all. Move both inputs to the free
	// slot.
	SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0];
	SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1];
	InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1);
	InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1;
	} else {
	// The only way we hit this point is if there is no clobbering
	// (because there are no off-half inputs to this half) and there is no
	// free slot adjacent to one of the inputs. In this case, we have to
	// swap an input with a non-input.
	for (int i = 0; i < 4; ++i)
	assert((SourceHalfMask[i] < 0 \|\| SourceHalfMask[i] == i) &&
	"We can't handle any clobbers here!");
	assert(InputsFixed[1] != (InputsFixed[0] ^ 1) &&
	"Cannot have adjacent inputs here!");

	SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
	SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1;

	// We also have to update the final source mask in this case because
	// it may need to undo the above swap.
	for (int &M : FinalSourceHalfMask)
	if (M == (InputsFixed[0] ^ 1) + SourceOffset)
	M = InputsFixed[1] + SourceOffset;
	else if (M == InputsFixed[1] + SourceOffset)
	M = (InputsFixed[0] ^ 1) + SourceOffset;

	InputsFixed[1] = InputsFixed[0] ^ 1;
	}

	// Point everything at the fixed inputs.
	for (int &M : HalfMask)
	if (M == IncomingInputs[0])
	M = InputsFixed[0] + SourceOffset;
	else if (M == IncomingInputs[1])
	M = InputsFixed[1] + SourceOffset;

	IncomingInputs[0] = InputsFixed[0] + SourceOffset;
	IncomingInputs[1] = InputsFixed[1] + SourceOffset;
	}
	} else {
	llvm_unreachable("Unhandled input size!");
	}

	// Now hoist the DWord down to the right half.
	int FreeDWord = (PSHUFDMask[DestOffset / 2] < 0 ? 0 : 1) + DestOffset / 2;
	assert(PSHUFDMask[FreeDWord] < 0 && "DWord not free");
	PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2;
	for (int &M : HalfMask)
	for (int Input : IncomingInputs)
	if (M == Input)
	M = FreeDWord * 2 + Input % 2;
	};
	moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask,
	/SourceOffset/ 4, /DestOffset/ 0);
	moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask,
	/SourceOffset/ 0, /DestOffset/ 4);

	// Now enact all the shuffles we've computed to move the inputs into their
	// target half.
	if (!isNoopShuffleMask(PSHUFLMask))
	V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
	getV4X86ShuffleImm8ForMask(PSHUFLMask, DL, DAG));
	if (!isNoopShuffleMask(PSHUFHMask))
	V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
	getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG));
	if (!isNoopShuffleMask(PSHUFDMask))
	V = DAG.getBitcast(
	VT,
	DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V),
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));

	// At this point, each half should contain all its inputs, and we can then
	// just shuffle them into their final position.
	assert(count_if(LoMask, [](int M) { return M >= 4; }) == 0 &&
	"Failed to lift all the high half inputs to the low mask!");
	assert(count_if(HiMask, [](int M) { return M >= 0 && M < 4; }) == 0 &&
	"Failed to lift all the low half inputs to the high mask!");

	// Do a half shuffle for the low mask.
	if (!isNoopShuffleMask(LoMask))
	V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
	getV4X86ShuffleImm8ForMask(LoMask, DL, DAG));

	// Do a half shuffle with the high mask after shifting its values down.
	for (int &M : HiMask)
	if (M >= 0)
	M -= 4;
	if (!isNoopShuffleMask(HiMask))
	V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
	getV4X86ShuffleImm8ForMask(HiMask, DL, DAG));

	return V;
	}

	/// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
	/// blend if only one input is used.
	static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse,
	bool &V2InUse) {
	SDValue V1Mask[16];
	SDValue V2Mask[16];
	V1InUse = false;
	V2InUse = false;

	int Size = Mask.size();
	int Scale = 16 / Size;
	for (int i = 0; i < 16; ++i) {
	if (Mask[i / Scale] < 0) {
	V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
	} else {
	const int ZeroMask = 0x80;
	int V1Idx = Mask[i / Scale] < Size ? Mask[i / Scale] * Scale + i % Scale
	: ZeroMask;
	int V2Idx = Mask[i / Scale] < Size
	? ZeroMask
	: (Mask[i / Scale] - Size) * Scale + i % Scale;
	if (Zeroable[i / Scale])
	V1Idx = V2Idx = ZeroMask;
	V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8);
	V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8);
	V1InUse \|= (ZeroMask != V1Idx);
	V2InUse \|= (ZeroMask != V2Idx);
	}
	}

	if (V1InUse)
	V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
	DAG.getBitcast(MVT::v16i8, V1),
	DAG.getBuildVector(MVT::v16i8, DL, V1Mask));
	if (V2InUse)
	V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
	DAG.getBitcast(MVT::v16i8, V2),
	DAG.getBuildVector(MVT::v16i8, DL, V2Mask));

	// If we need shuffled inputs from both, blend the two.
	SDValue V;
	if (V1InUse && V2InUse)
	V = DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
	else
	V = V1InUse ? V1 : V2;

	// Cast the result back to the correct type.
	return DAG.getBitcast(VT, V);
	}

	/// \brief Generic lowering of 8-lane i16 shuffles.
	///
	/// This handles both single-input shuffles and combined shuffle/blends with
	/// two inputs. The single input shuffles are immediately delegated to
	/// a dedicated lowering routine.
	///
	/// The blends are lowered in one of three fundamental ways. If there are few
	/// enough inputs, it delegates to a basic UNPCK-based strategy. If the shuffle
	/// of the input is significantly cheaper when lowered as an interleaving of
	/// the two inputs, try to interleave them. Otherwise, blend the low and high
	/// halves of the inputs separately (making them have relatively few inputs)
	/// and then concatenate them.
	static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
	assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative.
	if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
	DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; });

	if (NumV2Inputs == 0) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
	DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
	return Broadcast;

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
	return V;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1,
	Mask, Subtarget, DAG))
	return Rotate;

	// Make a copy of the mask so it can be modified.
	SmallVector<int, 8> MutableMask(Mask.begin(), Mask.end());
	return lowerV8I16GeneralSingleInputVectorShuffle(DL, MVT::v8i16, V1,
	MutableMask, Subtarget,
	DAG);
	}

	assert(llvm::any_of(Mask, [](int M) { return M >= 0 && M < 8; }) &&
	"All single-input shuffles should be canonicalized to be V1-input "
	"shuffles.");

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// See if we can use SSE4A Extraction / Insertion.
	if (Subtarget.hasSSE4A())
	if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
	Zeroable, DAG))
	return V;

	// There are special ways we can lower some single-element blends.
	if (NumV2Inputs == 1)
	if (SDValue V = lowerVectorShuffleAsElementInsertion(
	DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return V;

	// We have different paths for blend lowering, but they all must use the
	// exact same predicate.
	bool IsBlendSupported = Subtarget.hasSSE41();
	if (IsBlendSupported)
	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
	Zeroable, DAG))
	return Masked;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
	return V;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
	DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
	return Rotate;

	if (SDValue BitBlend =
	lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
	return BitBlend;

	// Try to lower by permuting the inputs into an unpack instruction.
	if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1,
	V2, Mask, DAG))
	return Unpack;

	// If we can't directly blend but can use PSHUFB, that will be better as it
	// can both shuffle and set up the inefficient blend.
	if (!IsBlendSupported && Subtarget.hasSSSE3()) {
	bool V1InUse, V2InUse;
	return lowerVectorShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask,
	Zeroable, DAG, V1InUse, V2InUse);
	}

	// We can always bit-blend if we have to so the fallback strategy is to
	// decompose into single-input permutes and blends.
	return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
	Mask, DAG);
	}

	/// \brief Check whether a compaction lowering can be done by dropping even
	/// elements and compute how many times even elements must be dropped.
	///
	/// This handles shuffles which take every Nth element where N is a power of
	/// two. Example shuffle masks:
	///
	/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 0, 2, 4, 6, 8, 10, 12, 14
	/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
	/// N = 2: 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12
	/// N = 2: 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28
	/// N = 3: 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8
	/// N = 3: 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24
	///
	/// Any of these lanes can of course be undef.
	///
	/// This routine only supports N <= 3.
	/// FIXME: Evaluate whether either AVX or AVX-512 have any opportunities here
	/// for larger N.
	///
	/// \returns N above, or the number of times even elements must be dropped if
	/// there is such a number. Otherwise returns zero.
	static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
	bool IsSingleInput) {
	// The modulus for the shuffle vector entries is based on whether this is
	// a single input or not.
	int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
	assert(isPowerOf2_32((uint32_t)ShuffleModulus) &&
	"We should only be called with masks with a power-of-2 size!");

	uint64_t ModMask = (uint64_t)ShuffleModulus - 1;

	// We track whether the input is viable for all power-of-2 strides 2^1, 2^2,
	// and 2^3 simultaneously. This is because we may have ambiguity with
	// partially undef inputs.
	bool ViableForN[3] = {true, true, true};

	for (int i = 0, e = Mask.size(); i < e; ++i) {
	// Ignore undef lanes, we'll optimistically collapse them to the pattern we
	// want.
	if (Mask[i] < 0)
	continue;

	bool IsAnyViable = false;
	for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
	if (ViableForN[j]) {
	uint64_t N = j + 1;

	// The shuffle mask must be equal to (i * 2^N) % M.
	if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask))
	IsAnyViable = true;
	else
	ViableForN[j] = false;
	}
	// Early exit if we exhaust the possible powers of two.
	if (!IsAnyViable)
	break;
	}

	for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
	if (ViableForN[j])
	return j + 1;

	// Return 0 as there is no viable power of two.
	return 0;
	}

	/// \brief Generic lowering of v16i8 shuffles.
	///
	/// This is a hybrid strategy to lower v16i8 vectors. It first attempts to
	/// detect any complexity reducing interleaving. If that doesn't help, it uses
	/// UNPCK to spread the i8 elements across two i16-element vectors, and uses
	/// the existing lowering for v8i16 blends on each half, finally PACK-ing them
	/// back together.
	static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
	assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
	DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
	return Rotate;

	// Try to use a zext lowering.
	if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
	DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	// See if we can use SSE4A Extraction / Insertion.
	if (Subtarget.hasSSE4A())
	if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
	Zeroable, DAG))
	return V;

	int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; });

	// For single-input shuffles, there are some nicer lowering tricks we can use.
	if (NumV2Elements == 0) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
	DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
	return Broadcast;

	// Check whether we can widen this to an i16 shuffle by duplicating bytes.
	// Notably, this handles splat and partial-splat shuffles more efficiently.
	// However, it only makes sense if the pre-duplication shuffle simplifies
	// things significantly. Currently, this means we need to be able to
	// express the pre-duplication shuffle as an i16 shuffle.
	//
	// FIXME: We should check for other patterns which can be widened into an
	// i16 shuffle as well.
	auto canWidenViaDuplication = [](ArrayRef<int> Mask) {
	for (int i = 0; i < 16; i += 2)
	if (Mask[i] >= 0 && Mask[i + 1] >= 0 && Mask[i] != Mask[i + 1])
	return false;

	return true;
	};
	auto tryToWidenViaDuplication = [&]() -> SDValue {
	if (!canWidenViaDuplication(Mask))
	return SDValue();
	SmallVector<int, 4> LoInputs;
	copy_if(Mask, std::back_inserter(LoInputs),
	[](int M) { return M >= 0 && M < 8; });
	std::sort(LoInputs.begin(), LoInputs.end());
	LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()),
	LoInputs.end());
	SmallVector<int, 4> HiInputs;
	copy_if(Mask, std::back_inserter(HiInputs), [](int M) { return M >= 8; });
	std::sort(HiInputs.begin(), HiInputs.end());
	HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()),
	HiInputs.end());

	bool TargetLo = LoInputs.size() >= HiInputs.size();
	ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs;
	ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs;

	int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};
	SmallDenseMap<int, int, 8> LaneMap;
	for (int I : InPlaceInputs) {
	PreDupI16Shuffle[I/2] = I/2;
	LaneMap[I] = I;
	}
	int j = TargetLo ? 0 : 4, je = j + 4;
	for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) {
	// Check if j is already a shuffle of this input. This happens when
	// there are two adjacent bytes after we move the low one.
	if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) {
	// If we haven't yet mapped the input, search for a slot into which
	// we can map it.
	while (j < je && PreDupI16Shuffle[j] >= 0)
	++j;

	if (j == je)
	// We can't place the inputs into a single half with a simple i16 shuffle, so bail.
	return SDValue();

	// Map this input with the i16 shuffle.
	PreDupI16Shuffle[j] = MovingInputs[i] / 2;
	}

	// Update the lane map based on the mapping we ended up with.
	LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2;
	}
	V1 = DAG.getBitcast(
	MVT::v16i8,
	DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1),
	DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle));

	// Unpack the bytes to form the i16s that will be shuffled into place.
	V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
	MVT::v16i8, V1, V1);

	int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
	for (int i = 0; i < 16; ++i)
	if (Mask[i] >= 0) {
	int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
	assert(MappedMask < 8 && "Invalid v8 shuffle mask!");
	if (PostDupI16Shuffle[i / 2] < 0)
	PostDupI16Shuffle[i / 2] = MappedMask;
	else
	assert(PostDupI16Shuffle[i / 2] == MappedMask &&
	"Conflicting entries in the original shuffle!");
	}
	return DAG.getBitcast(
	MVT::v16i8,
	DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1),
	DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle));
	};
	if (SDValue V = tryToWidenViaDuplication())
	return V;
	}

	if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask,
	Zeroable, DAG))
	return Masked;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
	return V;

	// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
	// with PSHUFB. It is important to do this before we attempt to generate any
	// blends but after all of the single-input lowerings. If the single input
	// lowerings can find an instruction sequence that is faster than a PSHUFB, we
	// want to preserve that and we can DAG combine any longer sequences into
	// a PSHUFB in the end. But once we start blending from multiple inputs,
	// the complexity of DAG combining bad patterns back into PSHUFB is too high,
	// and there are very few patterns that would actually be faster than the
	// PSHUFB approach because of its ability to zero lanes.
	//
	// FIXME: The only exceptions to the above are blends which are exact
	// interleavings with direct instructions supporting them. We currently don't
	// handle those well here.
	if (Subtarget.hasSSSE3()) {
	bool V1InUse = false;
	bool V2InUse = false;

	SDValue PSHUFB = lowerVectorShuffleAsBlendOfPSHUFBs(
	DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse);

	// If both V1 and V2 are in use and we can use a direct blend or an unpack,
	// do so. This avoids using them to handle blends-with-zero which is
	// important as a single pshufb is significantly faster for that.
	if (V1InUse && V2InUse) {
	if (Subtarget.hasSSE41())
	if (SDValue Blend = lowerVectorShuffleAsBlend(
	DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return Blend;

	// We can use an unpack to do the blending rather than an or in some
	// cases. Even though the or may be (very minorly) more efficient, we
	// preference this lowering because there are common cases where part of
	// the complexity of the shuffles goes away when we do the final blend as
	// an unpack.
	// FIXME: It might be worth trying to detect if the unpack-feeding
	// shuffles will both be pshufb, in which case we shouldn't bother with
	// this.
	if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
	DL, MVT::v16i8, V1, V2, Mask, DAG))
	return Unpack;
	}

	return PSHUFB;
	}

	// There are special ways we can lower some single-element blends.
	if (NumV2Elements == 1)
	if (SDValue V = lowerVectorShuffleAsElementInsertion(
	DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return V;

	if (SDValue BitBlend =
	lowerVectorShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
	return BitBlend;

	// Check whether a compaction lowering can be done. This handles shuffles
	// which take every Nth element for some even N. See the helper function for
	// details.
	//
	// We special case these as they can be particularly efficiently handled with
	// the PACKUSB instruction on x86 and they show up in common patterns of
	// rearranging bytes to truncate wide elements.
	bool IsSingleInput = V2.isUndef();
	if (int NumEvenDrops = canLowerByDroppingEvenElements(Mask, IsSingleInput)) {
	// NumEvenDrops is the power of two stride of the elements. Another way of
	// thinking about it is that we need to drop the even elements this many
	// times to get the original input.

	// First we need to zero all the dropped bytes.
	assert(NumEvenDrops <= 3 &&
	"No support for dropping even elements more than 3 times.");
	// We use the mask type to pick which bytes are preserved based on how many
	// elements are dropped.
	MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 };
	SDValue ByteClearMask = DAG.getBitcast(
	MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1]));
	V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask);
	if (!IsSingleInput)
	V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask);

	// Now pack things back together.
	V1 = DAG.getBitcast(MVT::v8i16, V1);
	V2 = IsSingleInput ? V1 : DAG.getBitcast(MVT::v8i16, V2);
	SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, V2);
	for (int i = 1; i < NumEvenDrops; ++i) {
	Result = DAG.getBitcast(MVT::v8i16, Result);
	Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result);
	}

	return Result;
	}

	// Handle multi-input cases by blending single-input shuffles.
	if (NumV2Elements > 0)
	return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2,
	Mask, DAG);

	// The fallback path for single-input shuffles widens this into two v8i16
	// vectors with unpacks, shuffles those, and then pulls them back together
	// with a pack.
	SDValue V = V1;

	std::array<int, 8> LoBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};
	std::array<int, 8> HiBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};
	for (int i = 0; i < 16; ++i)
	if (Mask[i] >= 0)
	(i < 8 ? LoBlendMask[i] : HiBlendMask[i % 8]) = Mask[i];

	SDValue VLoHalf, VHiHalf;
	// Check if any of the odd lanes in the v16i8 are used. If not, we can mask
	// them out and avoid using UNPCK{L,H} to extract the elements of V as
	// i16s.
	if (none_of(LoBlendMask, [](int M) { return M >= 0 && M % 2 == 1; }) &&
	none_of(HiBlendMask, [](int M) { return M >= 0 && M % 2 == 1; })) {
	// Use a mask to drop the high bytes.
	VLoHalf = DAG.getBitcast(MVT::v8i16, V);
	VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf,
	DAG.getConstant(0x00FF, DL, MVT::v8i16));

	// This will be a single vector shuffle instead of a blend so nuke VHiHalf.
	VHiHalf = DAG.getUNDEF(MVT::v8i16);

	// Squash the masks to point directly into VLoHalf.
	for (int &M : LoBlendMask)
	if (M >= 0)
	M /= 2;
	for (int &M : HiBlendMask)
	if (M >= 0)
	M /= 2;
	} else {
	// Otherwise just unpack the low half of V into VLoHalf and the high half into
	// VHiHalf so that we can blend them as i16s.
	SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL);

	VLoHalf = DAG.getBitcast(
	MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
	VHiHalf = DAG.getBitcast(
	MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
	}

	SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask);
	SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask);

	return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV);
	}

	/// \brief Dispatching routine to lower various 128-bit x86 vector shuffles.
	///
	/// This routine breaks down the specific type of 128-bit shuffle and
	/// dispatches to the lowering routines accordingly.
	static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	MVT VT, SDValue V1, SDValue V2,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	switch (VT.SimpleTy) {
	case MVT::v2i64:
	return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v2f64:
	return lowerV2F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v4i32:
	return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v4f32:
	return lowerV4F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v8i16:
	return lowerV8I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v16i8:
	return lowerV16I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);

	default:
	llvm_unreachable("Unimplemented!");
	}
	}

	/// \brief Generic routine to split vector shuffle into half-sized shuffles.
	///
	/// This routine just extracts two subvectors, shuffles them independently, and
	/// then concatenates them back together. This should work effectively with all
	/// AVX vector shuffle types.
	static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	assert(VT.getSizeInBits() >= 256 &&
	"Only for 256-bit or wider vector shuffles!");
	assert(V1.getSimpleValueType() == VT && "Bad operand type!");
	assert(V2.getSimpleValueType() == VT && "Bad operand type!");

	ArrayRef<int> LoMask = Mask.slice(0, Mask.size() / 2);
	ArrayRef<int> HiMask = Mask.slice(Mask.size() / 2);

	int NumElements = VT.getVectorNumElements();
	int SplitNumElements = NumElements / 2;
	MVT ScalarVT = VT.getVectorElementType();
	MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2);

	// Rather than splitting build-vectors, just build two narrower build
	// vectors. This helps shuffling with splats and zeros.
	auto SplitVector = [&](SDValue V) {
	V = peekThroughBitcasts(V);

	MVT OrigVT = V.getSimpleValueType();
	int OrigNumElements = OrigVT.getVectorNumElements();
	int OrigSplitNumElements = OrigNumElements / 2;
	MVT OrigScalarVT = OrigVT.getVectorElementType();
	MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2);

	SDValue LoV, HiV;

	auto *BV = dyn_cast<BuildVectorSDNode>(V);
	if (!BV) {
	LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V,
	DAG.getIntPtrConstant(0, DL));
	HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V,
	DAG.getIntPtrConstant(OrigSplitNumElements, DL));
	} else {

	SmallVector<SDValue, 16> LoOps, HiOps;
	for (int i = 0; i < OrigSplitNumElements; ++i) {
	LoOps.push_back(BV->getOperand(i));
	HiOps.push_back(BV->getOperand(i + OrigSplitNumElements));
	}
	LoV = DAG.getBuildVector(OrigSplitVT, DL, LoOps);
	HiV = DAG.getBuildVector(OrigSplitVT, DL, HiOps);
	}
	return std::make_pair(DAG.getBitcast(SplitVT, LoV),
	DAG.getBitcast(SplitVT, HiV));
	};

	SDValue LoV1, HiV1, LoV2, HiV2;
	std::tie(LoV1, HiV1) = SplitVector(V1);
	std::tie(LoV2, HiV2) = SplitVector(V2);

	// Now create two 4-way blends of these half-width vectors.
	auto HalfBlend = [&](ArrayRef<int> HalfMask) {
	bool UseLoV1 = false, UseHiV1 = false, UseLoV2 = false, UseHiV2 = false;
	SmallVector<int, 32> V1BlendMask((unsigned)SplitNumElements, -1);
	SmallVector<int, 32> V2BlendMask((unsigned)SplitNumElements, -1);
	SmallVector<int, 32> BlendMask((unsigned)SplitNumElements, -1);
	for (int i = 0; i < SplitNumElements; ++i) {
	int M = HalfMask[i];
	if (M >= NumElements) {
	if (M >= NumElements + SplitNumElements)
	UseHiV2 = true;
	else
	UseLoV2 = true;
	V2BlendMask[i] = M - NumElements;
	BlendMask[i] = SplitNumElements + i;
	} else if (M >= 0) {
	if (M >= SplitNumElements)
	UseHiV1 = true;
	else
	UseLoV1 = true;
	V1BlendMask[i] = M;
	BlendMask[i] = i;
	}
	}

	// Because the lowering happens after all combining takes place, we need to
	// manually combine these blend masks as much as possible so that we create
	// a minimal number of high-level vector shuffle nodes.

	// First try just blending the halves of V1 or V2.
	if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2)
	return DAG.getUNDEF(SplitVT);
	if (!UseLoV2 && !UseHiV2)
	return DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
	if (!UseLoV1 && !UseHiV1)
	return DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);

	SDValue V1Blend, V2Blend;
	if (UseLoV1 && UseHiV1) {
	V1Blend =
	DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
	} else {
	// We only use half of V1 so map the usage down into the final blend mask.
	V1Blend = UseLoV1 ? LoV1 : HiV1;
	for (int i = 0; i < SplitNumElements; ++i)
	if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements)
	BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements);
	}
	if (UseLoV2 && UseHiV2) {
	V2Blend =
	DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
	} else {
	// We only use half of V2 so map the usage down into the final blend mask.
	V2Blend = UseLoV2 ? LoV2 : HiV2;
	for (int i = 0; i < SplitNumElements; ++i)
	if (BlendMask[i] >= SplitNumElements)
	BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0);
	}
	return DAG.getVectorShuffle(SplitVT, DL, V1Blend, V2Blend, BlendMask);
	};
	SDValue Lo = HalfBlend(LoMask);
	SDValue Hi = HalfBlend(HiMask);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
	}

	/// \brief Either split a vector in halves or decompose the shuffles and the
	/// blend.
	///
	/// This is provided as a good fallback for many lowerings of non-single-input
	/// shuffles with more than one 128-bit lane. In those cases, we want to select
	/// between splitting the shuffle into 128-bit components and stitching those
	/// back together vs. extracting the single-input shuffles and blending those
	/// results.
	static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	assert(!V2.isUndef() && "This routine must not be used to lower single-input "
	"shuffles as it could then recurse on itself.");
	int Size = Mask.size();

	// If this can be modeled as a broadcast of two elements followed by a blend,
	// prefer that lowering. This is especially important because broadcasts can
	// often fold with memory operands.
	auto DoBothBroadcast = [&] {
	int V1BroadcastIdx = -1, V2BroadcastIdx = -1;
	for (int M : Mask)
	if (M >= Size) {
	if (V2BroadcastIdx < 0)
	V2BroadcastIdx = M - Size;
	else if (M - Size != V2BroadcastIdx)
	return false;
	} else if (M >= 0) {
	if (V1BroadcastIdx < 0)
	V1BroadcastIdx = M;
	else if (M != V1BroadcastIdx)
	return false;
	}
	return true;
	};
	if (DoBothBroadcast())
	return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
	DAG);

	// If the inputs all stem from a single 128-bit lane of each input, then we
	// split them rather than blending because the split will decompose to
	// unusually few instructions.
	int LaneCount = VT.getSizeInBits() / 128;
	int LaneSize = Size / LaneCount;
	SmallBitVector LaneInputs[2];
	LaneInputs[0].resize(LaneCount, false);
	LaneInputs[1].resize(LaneCount, false);
	for (int i = 0; i < Size; ++i)
	if (Mask[i] >= 0)
	LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true;
	if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1)
	return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);

	// Otherwise, just fall back to decomposed shuffles and a blend. This requires
	// that the decomposed single-input shuffles don't end up here.
	return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
	}

	/// \brief Lower a vector shuffle crossing multiple 128-bit lanes as
	/// a permutation and blend of those lanes.
	///
	/// This essentially blends the out-of-lane inputs to each lane into the lane
	/// from a permuted copy of the vector. This lowering strategy results in four
	/// instructions in the worst case for a single-input cross lane shuffle which
	/// is lower than any other fully general cross-lane shuffle strategy I'm aware
	/// of. Special cases for each particular shuffle pattern should be handled
	/// prior to trying this lowering.
	static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	SelectionDAG &DAG) {
	// FIXME: This should probably be generalized for 512-bit vectors as well.
	assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
	int Size = Mask.size();
	int LaneSize = Size / 2;

	// If there are only inputs from one 128-bit lane, splitting will in fact be
	// less expensive. The flags track whether the given lane contains an element
	// that crosses to another lane.
	bool LaneCrossing[2] = {false, false};
	for (int i = 0; i < Size; ++i)
	if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
	LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
	if (!LaneCrossing[0] \|\| !LaneCrossing[1])
	return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);

	assert(V2.isUndef() &&
	"This last part of this routine only works on single input shuffles");

	SmallVector<int, 32> FlippedBlendMask(Size);
	for (int i = 0; i < Size; ++i)
	FlippedBlendMask[i] =
	Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize)
	? Mask[i]
	: Mask[i] % LaneSize +
	(i / LaneSize) * LaneSize + Size);

	// Flip the vector, and blend the results which should now be in-lane. The
	// VPERM2X128 mask uses the low 2 bits for the low source and bits 4 and
	// 5 for the high source. The value 3 selects the high half of source 2 and
	// the value 2 selects the low half of source 2. We only use source 2 to
	// allow folding it into a memory operand.
	unsigned PERMMask = 3 \| 2 << 4;
	SDValue Flipped = DAG.getNode(X86ISD::VPERM2X128, DL, VT, DAG.getUNDEF(VT),
	V1, DAG.getConstant(PERMMask, DL, MVT::i8));
	return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask);
	}

	/// \brief Handle lowering 2-lane 128-bit shuffles.
	static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
	SDValue V2, ArrayRef<int> Mask,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SmallVector<int, 4> WidenedMask;
	if (!canWidenShuffleElements(Mask, WidenedMask))
	return SDValue();

	// TODO: If minimizing size and one of the inputs is a zero vector and the
	// the zero vector has only one use, we could use a VPERM2X128 to save the
	// instruction bytes needed to explicitly generate the zero vector.

	// Blends are faster and handle all the non-lane-crossing cases.
	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode());
	bool IsV2Zero = ISD::isBuildVectorAllZeros(V2.getNode());

	// If either input operand is a zero vector, use VPERM2X128 because its mask
	// allows us to replace the zero input with an implicit zero.
	if (!IsV1Zero && !IsV2Zero) {
	// Check for patterns which can be matched with a single insert of a 128-bit
	// subvector.
	bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1});
	if (OnlyUsesV1 \|\| isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) {
	// With AVX2, use VPERMQ/VPERMPD to allow memory folding.
	if (Subtarget.hasAVX2() && V2.isUndef())
	return SDValue();

	// With AVX1, use vperm2f128 (below) to allow load folding. Otherwise,
	// this will likely become vinsertf128 which can't fold a 256-bit memop.
	if (!isa<LoadSDNode>(peekThroughBitcasts(V1))) {
	MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
	VT.getVectorNumElements() / 2);
	SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
	DAG.getIntPtrConstant(0, DL));
	SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
	OnlyUsesV1 ? V1 : V2,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
	}
	}
	}

	// Otherwise form a 128-bit permutation. After accounting for undefs,
	// convert the 64-bit shuffle mask selection values into 128-bit
	// selection bits by dividing the indexes by 2 and shifting into positions
	// defined by a vperm2*128 instruction's immediate control byte.

	// The immediate permute control byte looks like this:
	// [1:0] - select 128 bits from sources for low half of destination
	// [2] - ignore
	// [3] - zero low half of destination
	// [5:4] - select 128 bits from sources for high half of destination
	// [6] - ignore
	// [7] - zero high half of destination

	int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0];
	int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1];

	unsigned PermMask = MaskLO \| (MaskHI << 4);

	// If either input is a zero vector, replace it with an undef input.
	// Shuffle mask values < 4 are selecting elements of V1.
	// Shuffle mask values >= 4 are selecting elements of V2.
	// Adjust each half of the permute mask by clearing the half that was
	// selecting the zero vector and setting the zero mask bit.
	if (IsV1Zero) {
	V1 = DAG.getUNDEF(VT);
	if (MaskLO < 2)
	PermMask = (PermMask & 0xf0) \| 0x08;
	if (MaskHI < 2)
	PermMask = (PermMask & 0x0f) \| 0x80;
	}
	if (IsV2Zero) {
	V2 = DAG.getUNDEF(VT);
	if (MaskLO >= 2)
	PermMask = (PermMask & 0xf0) \| 0x08;
	if (MaskHI >= 2)
	PermMask = (PermMask & 0x0f) \| 0x80;
	}

	return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
	DAG.getConstant(PermMask, DL, MVT::i8));
	}

	/// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
	/// shuffling each lane.
	///
	/// This will only succeed when the result of fixing the 128-bit lanes results
	/// in a single-input non-lane-crossing shuffle with a repeating shuffle mask in
	/// each 128-bit lanes. This handles many cases where we can quickly blend away
	/// the lane crosses early and then use simpler shuffles within each lane.
	///
	/// FIXME: It might be worthwhile at some point to support this without
	/// requiring the 128-bit lane-relative shuffles to be repeating, but currently
	/// in x86 only floating point has interesting non-repeating shuffles, and even
	/// those are still marginally more expensive.
	static SDValue lowerVectorShuffleByMerging128BitLanes(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	const X86Subtarget &Subtarget, SelectionDAG &DAG) {
	assert(!V2.isUndef() && "This is only useful with multiple inputs.");

	int Size = Mask.size();
	int LaneSize = 128 / VT.getScalarSizeInBits();
	int NumLanes = Size / LaneSize;
	assert(NumLanes > 1 && "Only handles 256-bit and wider shuffles.");

	// See if we can build a hypothetical 128-bit lane-fixing shuffle mask. Also
	// check whether the in-128-bit lane shuffles share a repeating pattern.
	SmallVector<int, 4> Lanes((unsigned)NumLanes, -1);
	SmallVector<int, 4> InLaneMask((unsigned)LaneSize, -1);
	for (int i = 0; i < Size; ++i) {
	if (Mask[i] < 0)
	continue;

	int j = i / LaneSize;

	if (Lanes[j] < 0) {
	// First entry we've seen for this lane.
	Lanes[j] = Mask[i] / LaneSize;
	} else if (Lanes[j] != Mask[i] / LaneSize) {
	// This doesn't match the lane selected previously!
	return SDValue();
	}

	// Check that within each lane we have a consistent shuffle mask.
	int k = i % LaneSize;
	if (InLaneMask[k] < 0) {
	InLaneMask[k] = Mask[i] % LaneSize;
	} else if (InLaneMask[k] != Mask[i] % LaneSize) {
	// This doesn't fit a repeating in-lane mask.
	return SDValue();
	}
	}

	// First shuffle the lanes into place.
	MVT LaneVT = MVT::getVectorVT(VT.isFloatingPoint() ? MVT::f64 : MVT::i64,
	VT.getSizeInBits() / 64);
	SmallVector<int, 8> LaneMask((unsigned)NumLanes * 2, -1);
	for (int i = 0; i < NumLanes; ++i)
	if (Lanes[i] >= 0) {
	LaneMask[2 * i + 0] = 2*Lanes[i] + 0;
	LaneMask[2 * i + 1] = 2*Lanes[i] + 1;
	}

	V1 = DAG.getBitcast(LaneVT, V1);
	V2 = DAG.getBitcast(LaneVT, V2);
	SDValue LaneShuffle = DAG.getVectorShuffle(LaneVT, DL, V1, V2, LaneMask);

	// Cast it back to the type we actually want.
	LaneShuffle = DAG.getBitcast(VT, LaneShuffle);

	// Now do a simple shuffle that isn't lane crossing.
	SmallVector<int, 8> NewMask((unsigned)Size, -1);
	for (int i = 0; i < Size; ++i)
	if (Mask[i] >= 0)
	NewMask[i] = (i / LaneSize) * LaneSize + Mask[i] % LaneSize;
	assert(!is128BitLaneCrossingShuffleMask(VT, NewMask) &&
	"Must not introduce lane crosses at this point!");

	return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
	}

	/// Lower shuffles where an entire half of a 256-bit vector is UNDEF.
	/// This allows for fast cases such as subvector extraction/insertion
	/// or shuffling smaller vector types which can lower more efficiently.
	static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
	SDValue V1, SDValue V2,
	ArrayRef<int> Mask,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(VT.is256BitVector() && "Expected 256-bit vector");

	unsigned NumElts = VT.getVectorNumElements();
	unsigned HalfNumElts = NumElts / 2;
	MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);

	bool UndefLower = isUndefInRange(Mask, 0, HalfNumElts);
	bool UndefUpper = isUndefInRange(Mask, HalfNumElts, HalfNumElts);
	if (!UndefLower && !UndefUpper)
	return SDValue();

	// Upper half is undef and lower half is whole upper subvector.
	// e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
	if (UndefUpper &&
	isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
	SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
	DAG.getIntPtrConstant(HalfNumElts, DL));
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
	DAG.getIntPtrConstant(0, DL));
	}

	// Lower half is undef and upper half is whole lower subvector.
	// e.g. vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
	if (UndefLower &&
	isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) {
	SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
	DAG.getIntPtrConstant(HalfNumElts, DL));
	}

	// If the shuffle only uses two of the four halves of the input operands,
	// then extract them and perform the 'half' shuffle at half width.
	// e.g. vector_shuffle <X, X, X, X, u, u, u, u> or <X, X, u, u>
	int HalfIdx1 = -1, HalfIdx2 = -1;
	SmallVector<int, 8> HalfMask(HalfNumElts);
	unsigned Offset = UndefLower ? HalfNumElts : 0;
	for (unsigned i = 0; i != HalfNumElts; ++i) {
	int M = Mask[i + Offset];
	if (M < 0) {
	HalfMask[i] = M;
	continue;
	}

	// Determine which of the 4 half vectors this element is from.
	// i.e. 0 = Lower V1, 1 = Upper V1, 2 = Lower V2, 3 = Upper V2.
	int HalfIdx = M / HalfNumElts;

	// Determine the element index into its half vector source.
	int HalfElt = M % HalfNumElts;

	// We can shuffle with up to 2 half vectors, set the new 'half'
	// shuffle mask accordingly.
	if (HalfIdx1 < 0 \|\| HalfIdx1 == HalfIdx) {
	HalfMask[i] = HalfElt;
	HalfIdx1 = HalfIdx;
	continue;
	}
	if (HalfIdx2 < 0 \|\| HalfIdx2 == HalfIdx) {
	HalfMask[i] = HalfElt + HalfNumElts;
	HalfIdx2 = HalfIdx;
	continue;
	}

	// Too many half vectors referenced.
	return SDValue();
	}
	assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");

	// Only shuffle the halves of the inputs when useful.
	int NumLowerHalves =
	(HalfIdx1 == 0 \|\| HalfIdx1 == 2) + (HalfIdx2 == 0 \|\| HalfIdx2 == 2);
	int NumUpperHalves =
	(HalfIdx1 == 1 \|\| HalfIdx1 == 3) + (HalfIdx2 == 1 \|\| HalfIdx2 == 3);

	// uuuuXXXX - don't extract uppers just to insert again.
	if (UndefLower && NumUpperHalves != 0)
	return SDValue();

	// XXXXuuuu - don't extract both uppers, instead shuffle and then extract.
	if (UndefUpper && NumUpperHalves == 2)
	return SDValue();

	// AVX2 - XXXXuuuu - always extract lowers.
	if (Subtarget.hasAVX2() && !(UndefUpper && NumUpperHalves == 0)) {
	// AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
	if (VT == MVT::v4f64 \|\| VT == MVT::v4i64)
	return SDValue();
	// AVX2 supports variable 32-bit element cross-lane shuffles.
	if (VT == MVT::v8f32 \|\| VT == MVT::v8i32) {
	// XXXXuuuu - don't extract lowers and uppers.
	if (UndefUpper && NumLowerHalves != 0 && NumUpperHalves != 0)
	return SDValue();
	}
	}

	auto GetHalfVector = [&](int HalfIdx) {
	if (HalfIdx < 0)
	return DAG.getUNDEF(HalfVT);
	SDValue V = (HalfIdx < 2 ? V1 : V2);
	HalfIdx = (HalfIdx % 2) * HalfNumElts;
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V,
	DAG.getIntPtrConstant(HalfIdx, DL));
	};

	SDValue Half1 = GetHalfVector(HalfIdx1);
	SDValue Half2 = GetHalfVector(HalfIdx2);
	SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
	DAG.getIntPtrConstant(Offset, DL));
	}

	/// \brief Test whether the specified input (0 or 1) is in-place blended by the
	/// given mask.
	///
	/// This returns true if the elements from a particular input are already in the
	/// slot required by the given mask and require no permutation.
	static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) {
	assert((Input == 0 \|\| Input == 1) && "Only two inputs to shuffles.");
	int Size = Mask.size();
	for (int i = 0; i < Size; ++i)
	if (Mask[i] >= 0 && Mask[i] / Size == Input && Mask[i] % Size != i)
	return false;

	return true;
	}

	/// Handle case where shuffle sources are coming from the same 128-bit lane and
	/// every lane can be represented as the same repeating mask - allowing us to
	/// shuffle the sources with the repeating shuffle and then permute the result
	/// to the destination lanes.
	static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
	const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
	const X86Subtarget &Subtarget, SelectionDAG &DAG) {
	int NumElts = VT.getVectorNumElements();
	int NumLanes = VT.getSizeInBits() / 128;
	int NumLaneElts = NumElts / NumLanes;

	// On AVX2 we may be able to just shuffle the lowest elements and then
	// broadcast the result.
	if (Subtarget.hasAVX2()) {
	for (unsigned BroadcastSize : {16, 32, 64}) {
	if (BroadcastSize <= VT.getScalarSizeInBits())
	continue;
	int NumBroadcastElts = BroadcastSize / VT.getScalarSizeInBits();

	// Attempt to match a repeating pattern every NumBroadcastElts,
	// accounting for UNDEFs but only references the lowest 128-bit
	// lane of the inputs.
	auto FindRepeatingBroadcastMask = [&](SmallVectorImpl<int> &RepeatMask) {
	for (int i = 0; i != NumElts; i += NumBroadcastElts)
	for (int j = 0; j != NumBroadcastElts; ++j) {
	int M = Mask[i + j];
	if (M < 0)
	continue;
	int &R = RepeatMask[j];
	if (0 != ((M % NumElts) / NumLaneElts))
	return false;
	if (0 <= R && R != M)
	return false;
	R = M;
	}
	return true;
	};

	SmallVector<int, 8> RepeatMask((unsigned)NumElts, -1);
	if (!FindRepeatingBroadcastMask(RepeatMask))
	continue;

	// Shuffle the (lowest) repeated elements in place for broadcast.
	SDValue RepeatShuf = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatMask);

	// Shuffle the actual broadcast.
	SmallVector<int, 8> BroadcastMask((unsigned)NumElts, -1);
	for (int i = 0; i != NumElts; i += NumBroadcastElts)
	for (int j = 0; j != NumBroadcastElts; ++j)
	BroadcastMask[i + j] = j;
	return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT),
	BroadcastMask);
	}
	}

	// Bail if the shuffle mask doesn't cross 128-bit lanes.
	if (!is128BitLaneCrossingShuffleMask(VT, Mask))
	return SDValue();

	// Bail if we already have a repeated lane shuffle mask.
	SmallVector<int, 8> RepeatedShuffleMask;
	if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedShuffleMask))
	return SDValue();

	// On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes
	// (with PERMQ/PERMPD), otherwise we can only permute whole 128-bit lanes.
	int SubLaneScale = Subtarget.hasAVX2() && VT.is256BitVector() ? 2 : 1;
	int NumSubLanes = NumLanes * SubLaneScale;
	int NumSubLaneElts = NumLaneElts / SubLaneScale;

	// Check that all the sources are coming from the same lane and see if we can
	// form a repeating shuffle mask (local to each sub-lane). At the same time,
	// determine the source sub-lane for each destination sub-lane.
	int TopSrcSubLane = -1;
	SmallVector<int, 8> Dst2SrcSubLanes((unsigned)NumSubLanes, -1);
	SmallVector<int, 8> RepeatedSubLaneMasks[2] = {
	SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef),
	SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef)};

	for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) {
	// Extract the sub-lane mask, check that it all comes from the same lane
	// and normalize the mask entries to come from the first lane.
	int SrcLane = -1;
	SmallVector<int, 8> SubLaneMask((unsigned)NumSubLaneElts, -1);
	for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
	int M = Mask[(DstSubLane * NumSubLaneElts) + Elt];
	if (M < 0)
	continue;
	int Lane = (M % NumElts) / NumLaneElts;
	if ((0 <= SrcLane) && (SrcLane != Lane))
	return SDValue();
	SrcLane = Lane;
	int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts);
	SubLaneMask[Elt] = LocalM;
	}

	// Whole sub-lane is UNDEF.
	if (SrcLane < 0)
	continue;

	// Attempt to match against the candidate repeated sub-lane masks.
	for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) {
	auto MatchMasks = [NumSubLaneElts](ArrayRef<int> M1, ArrayRef<int> M2) {
	for (int i = 0; i != NumSubLaneElts; ++i) {
	if (M1[i] < 0 \|\| M2[i] < 0)
	continue;
	if (M1[i] != M2[i])
	return false;
	}
	return true;
	};

	auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane];
	if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask))
	continue;

	// Merge the sub-lane mask into the matching repeated sub-lane mask.
	for (int i = 0; i != NumSubLaneElts; ++i) {
	int M = SubLaneMask[i];
	if (M < 0)
	continue;
	assert((RepeatedSubLaneMask[i] < 0 \|\| RepeatedSubLaneMask[i] == M) &&
	"Unexpected mask element");
	RepeatedSubLaneMask[i] = M;
	}

	// Track the top most source sub-lane - by setting the remaining to UNDEF
	// we can greatly simplify shuffle matching.
	int SrcSubLane = (SrcLane * SubLaneScale) + SubLane;
	TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane);
	Dst2SrcSubLanes[DstSubLane] = SrcSubLane;
	break;
	}

	// Bail if we failed to find a matching repeated sub-lane mask.
	if (Dst2SrcSubLanes[DstSubLane] < 0)
	return SDValue();
	}
	assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes &&
	"Unexpected source lane");

	// Create a repeating shuffle mask for the entire vector.
	SmallVector<int, 8> RepeatedMask((unsigned)NumElts, -1);
	for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) {
	int Lane = SubLane / SubLaneScale;
	auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale];
	for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) {
	int M = RepeatedSubLaneMask[Elt];
	if (M < 0)
	continue;
	int Idx = (SubLane * NumSubLaneElts) + Elt;
	RepeatedMask[Idx] = M + (Lane * NumLaneElts);
	}
	}
	SDValue RepeatedShuffle = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask);

	// Shuffle each source sub-lane to its destination.
	SmallVector<int, 8> SubLaneMask((unsigned)NumElts, -1);
	for (int i = 0; i != NumElts; i += NumSubLaneElts) {
	int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts];
	if (SrcSubLane < 0)
	continue;
	for (int j = 0; j != NumSubLaneElts; ++j)
	SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts);
	}

	return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT),
	SubLaneMask);
	}

	static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
	unsigned &ShuffleImm,
	ArrayRef<int> Mask) {
	int NumElts = VT.getVectorNumElements();
	assert(VT.getScalarSizeInBits() == 64 &&
	(NumElts == 2 \|\| NumElts == 4 \|\| NumElts == 8) &&
	"Unexpected data type for VSHUFPD");

	// Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, ..
	// Mask for V4F64; 0/1, 4/5, 2/3, 6/7..
	ShuffleImm = 0;
	bool ShufpdMask = true;
	bool CommutableMask = true;
	for (int i = 0; i < NumElts; ++i) {
	if (Mask[i] == SM_SentinelUndef)
	continue;
	if (Mask[i] < 0)
	return false;
	int Val = (i & 6) + NumElts * (i & 1);
	int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1);
	if (Mask[i] < Val \|\| Mask[i] > Val + 1)
	ShufpdMask = false;
	if (Mask[i] < CommutVal \|\| Mask[i] > CommutVal + 1)
	CommutableMask = false;
	ShuffleImm \|= (Mask[i] % 2) << i;
	}

	if (ShufpdMask)
	return true;
	if (CommutableMask) {
	std::swap(V1, V2);
	return true;
	}

	return false;
	}

	static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
	ArrayRef<int> Mask, SDValue V1,
	SDValue V2, SelectionDAG &DAG) {
	assert((VT == MVT::v2f64 \|\| VT == MVT::v4f64 \|\| VT == MVT::v8f64)&&
	"Unexpected data type for VSHUFPD");

	unsigned Immediate = 0;
	if (!matchVectorShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
	return SDValue();

	return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
	DAG.getConstant(Immediate, DL, MVT::i8));
	}

	static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
	ArrayRef<int> Mask, SDValue V1,
	SDValue V2, SelectionDAG &DAG) {
	MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
	MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());

	SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true);
	if (V2.isUndef())
	return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);

	return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2);
	}

	/// \brief Handle lowering of 4-lane 64-bit floating point shuffles.
	///
	/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
	/// isn't available.
	static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
	assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");

	if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return V;

	if (V2.isUndef()) {
	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
	DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
	return Broadcast;

	// Use low duplicate instructions for masks that match their pattern.
	if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2}))
	return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1);

	if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) {
	// Non-half-crossing single input shuffles can be lowered with an
	// interleaved permutation.
	unsigned VPERMILPMask = (Mask[0] == 1) \| ((Mask[1] == 1) << 1) \|
	((Mask[2] == 3) << 2) \| ((Mask[3] == 3) << 3);
	return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1,
	DAG.getConstant(VPERMILPMask, DL, MVT::i8));
	}

	// With AVX2 we have direct support for this permutation.
	if (Subtarget.hasAVX2())
	return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1,
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// the results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
	return V;

	// Otherwise, fall back.
	return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask,
	DAG);
	}

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG))
	return V;

	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Check if the blend happens to exactly fit that of SHUFPD.
	if (SDValue Op =
	lowerVectorShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG))
	return Op;

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// the results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
	return V;

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle. However, if we have AVX2 and either inputs are already in place,
	// we will be able to shuffle even across lanes the other input in a single
	// instruction so skip this pattern.
	if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) \|\|
	isShuffleMaskInputInPlace(1, Mask))))
	if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
	DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
	return Result;
	// If we have VLX support, we can use VEXPAND.
	if (Subtarget.hasVLX())
	if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask,
	V1, V2, DAG, Subtarget))
	return V;

	// If we have AVX2 then we always want to lower with a blend because an v4 we
	// can fully permute the elements.
	if (Subtarget.hasAVX2())
	return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
	Mask, DAG);

	// Otherwise fall back on generic lowering.
	return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, DAG);
	}

	/// \brief Handle lowering of 4-lane 64-bit integer shuffles.
	///
	/// This routine is only called when we have AVX2 and thus a reasonable
	/// instruction set for v4i64 shuffling..
	static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
	assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
	assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");

	if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return V;

	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i64, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;

	if (V2.isUndef()) {
	// When the shuffle is mirrored between the 128-bit lanes of the unit, we
	// can use lower latency instructions that will operate on both lanes.
	SmallVector<int, 2> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) {
	SmallVector<int, 4> PSHUFDMask;
	scaleShuffleMask(2, RepeatedMask, PSHUFDMask);
	return DAG.getBitcast(
	MVT::v4i64,
	DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
	DAG.getBitcast(MVT::v8i32, V1),
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
	}

	// AVX2 provides a direct instruction for permuting a single input across
	// lanes.
	return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1,
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
	}

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// If we have VLX support, we can use VALIGN or VEXPAND.
	if (Subtarget.hasVLX()) {
	if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2,
	Mask, Subtarget, DAG))
	return Rotate;

	if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask,
	V1, V2, DAG, Subtarget))
	return V;
	}

	// Try to use PALIGNR.
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2,
	Mask, Subtarget, DAG))
	return Rotate;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
	return V;

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle. However, if we have AVX2 and either inputs are already in place,
	// we will be able to shuffle even across lanes the other input in a single
	// instruction so skip this pattern.
	if (!isShuffleMaskInputInPlace(0, Mask) &&
	!isShuffleMaskInputInPlace(1, Mask))
	if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
	DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
	return Result;

	// Otherwise fall back on generic blend lowering.
	return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2,
	Mask, DAG);
	}

	/// \brief Handle lowering of 8-lane 32-bit floating point shuffles.
	///
	/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
	/// isn't available.
	static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
	assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8f32, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;

	// If the shuffle mask is repeated in each 128-bit lane, we have many more
	// options to efficiently lower the shuffle.
	SmallVector<int, 4> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) {
	assert(RepeatedMask.size() == 4 &&
	"Repeated masks must be half the mask width!");

	// Use even/odd duplicate instructions for masks that match their pattern.
	if (isShuffleEquivalent(V1, V2, RepeatedMask, {0, 0, 2, 2}))
	return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1);
	if (isShuffleEquivalent(V1, V2, RepeatedMask, {1, 1, 3, 3}))
	return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1);

	if (V2.isUndef())
	return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1,
	getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG))
	return V;

	// Otherwise, fall back to a SHUFPS sequence. Here it is important that we
	// have already handled any direct blends.
	return lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG);
	}

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// the results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
	return V;

	// If we have a single input shuffle with different shuffle patterns in the
	// two 128-bit lanes use the variable mask to VPERMILPS.
	if (V2.isUndef()) {
	SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
	if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask))
	return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, V1, VPermMask);

	if (Subtarget.hasAVX2())
	return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1);

	// Otherwise, fall back.
	return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
	DAG);
	}

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle.
	if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
	DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
	return Result;
	// If we have VLX support, we can use VEXPAND.
	if (Subtarget.hasVLX())
	if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask,
	V1, V2, DAG, Subtarget))
	return V;

	// For non-AVX512 if the Mask is of 16bit elements in lane then try to split
	// since after split we get a more efficient code using vpunpcklwd and
	// vpunpckhwd instrs than vblend.
	if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
	if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2,
	Mask, DAG))
	return V;

	// If we have AVX2 then we always want to lower with a blend because at v8 we
	// can fully permute the elements.
	if (Subtarget.hasAVX2())
	return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
	Mask, DAG);

	// Otherwise fall back on generic lowering.
	return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, DAG);
	}

	/// \brief Handle lowering of 8-lane 32-bit integer shuffles.
	///
	/// This routine is only called when we have AVX2 and thus a reasonable
	/// instruction set for v8i32 shuffling..
	static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
	assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
	assert(Subtarget.hasAVX2() && "We can only lower v8i32 with AVX2!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
	DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	// For non-AVX512 if the Mask is of 16bit elements in lane then try to split
	// since after split we get a more efficient code than vblend by using
	// vpunpcklwd and vpunpckhwd instrs.
	if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
	!Subtarget.hasAVX512())
	if (SDValue V =
	lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, DAG))
	return V;

	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i32, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;

	// If the shuffle mask is repeated in each 128-bit lane we can use more
	// efficient instructions that mirror the shuffles across the two 128-bit
	// lanes.
	SmallVector<int, 4> RepeatedMask;
	bool Is128BitLaneRepeatedShuffle =
	is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask);
	if (Is128BitLaneRepeatedShuffle) {
	assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
	if (V2.isUndef())
	return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1,
	getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG))
	return V;
	}

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// If we have VLX support, we can use VALIGN or EXPAND.
	if (Subtarget.hasVLX()) {
	if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2,
	Mask, Subtarget, DAG))
	return Rotate;

	if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask,
	V1, V2, DAG, Subtarget))
	return V;
	}

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
	DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
	return Rotate;

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
	return V;

	// If the shuffle patterns aren't repeated but it is a single input, directly
	// generate a cross-lane VPERMD instruction.
	if (V2.isUndef()) {
	SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
	return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8i32, VPermMask, V1);
	}

	// Assume that a single SHUFPS is faster than an alternative sequence of
	// multiple instructions (even if the CPU has a domain penalty).
	// If some CPU is harmed by the domain switch, we can fix it in a later pass.
	if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
	SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1);
	SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2);
	SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask,
	CastV1, CastV2, DAG);
	return DAG.getBitcast(MVT::v8i32, ShufPS);
	}

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle.
	if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
	DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
	return Result;

	// Otherwise fall back on generic blend lowering.
	return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2,
	Mask, DAG);
	}

	/// \brief Handle lowering of 16-lane 16-bit integer shuffles.
	///
	/// This routine is only called when we have AVX2 and thus a reasonable
	/// instruction set for v16i16 shuffling..
	static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
	assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
	assert(Subtarget.hasAVX2() && "We can only lower v16i16 with AVX2!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
	DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i16, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;

	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG))
	return V;

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
	DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
	return Rotate;

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// the results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
	return V;

	if (V2.isUndef()) {
	// There are no generalized cross-lane shuffle operations available on i16
	// element types.
	if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask))
	return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
	Mask, DAG);

	SmallVector<int, 8> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
	// As this is a single-input shuffle, the repeated mask should be
	// a strictly valid v8i16 mask that we can pass through to the v8i16
	// lowering to handle even the v16 case.
	return lowerV8I16GeneralSingleInputVectorShuffle(
	DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
	}
	}

	if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
	DL, MVT::v16i16, Mask, V1, V2, Zeroable, Subtarget, DAG))
	return PSHUFB;

	// AVX512BWVL can lower to VPERMW.
	if (Subtarget.hasBWI() && Subtarget.hasVLX())
	return lowerVectorShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, DAG);

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle.
	if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
	DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
	return Result;

	// Otherwise fall back on generic lowering.
	return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, DAG);
	}

	/// \brief Handle lowering of 32-lane 8-bit integer shuffles.
	///
	/// This routine is only called when we have AVX2 and thus a reasonable
	/// instruction set for v32i8 shuffling..
	static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
	assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
	assert(Subtarget.hasAVX2() && "We can only lower v32i8 with AVX2!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
	DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v32i8, V1, V2,
	Mask, Subtarget, DAG))
	return Broadcast;

	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG))
	return V;

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
	DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
	return Rotate;

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// the results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
	return V;

	// There are no generalized cross-lane shuffle operations available on i8
	// element types.
	if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
	return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask,
	DAG);

	if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
	DL, MVT::v32i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
	return PSHUFB;

	// Try to simplify this by merging 128-bit lanes to enable a lane-based
	// shuffle.
	if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
	DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
	return Result;

	// Otherwise fall back on generic lowering.
	return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, DAG);
	}

	/// \brief High-level routine to lower various 256-bit x86 vector shuffles.
	///
	/// This routine either breaks down the specific type of a 256-bit x86 vector
	/// shuffle or splits it into two 128-bit shuffles and fuses the results back
	/// together based on the available instructions.
	static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	MVT VT, SDValue V1, SDValue V2,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	// If we have a single input to the zero element, insert that into V1 if we
	// can do so cheaply.
	int NumElts = VT.getVectorNumElements();
	int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });

	if (NumV2Elements == 1 && Mask[0] >= NumElts)
	if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
	DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return Insertion;

	// Handle special cases where the lower or upper half is UNDEF.
	if (SDValue V =
	lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
	return V;

	// There is a really nice hard cut-over between AVX1 and AVX2 that means we
	// can check for those subtargets here and avoid much of the subtarget
	// querying in the per-vector-type lowering routines. With AVX1 we have
	// essentially zero ability to manipulate a 256-bit vector with integer
	// types. Since we'll use floating point types there eventually, just
	// immediately cast everything to a float and operate entirely in that domain.
	if (VT.isInteger() && !Subtarget.hasAVX2()) {
	int ElementBits = VT.getScalarSizeInBits();
	if (ElementBits < 32) {
	// No floating point type available, if we can't use the bit operations
	// for masking/blending then decompose into 128-bit vectors.
	if (SDValue V =
	lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
	return V;
	if (SDValue V = lowerVectorShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
	return V;
	return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
	}

	MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
	VT.getVectorNumElements());
	V1 = DAG.getBitcast(FpVT, V1);
	V2 = DAG.getBitcast(FpVT, V2);
	return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
	}

	switch (VT.SimpleTy) {
	case MVT::v4f64:
	return lowerV4F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v4i64:
	return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v8f32:
	return lowerV8F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v8i32:
	return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v16i16:
	return lowerV16I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v32i8:
	return lowerV32I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);

	default:
	llvm_unreachable("Not a valid 256-bit x86 vector type!");
	}
	}

	/// \brief Try to lower a vector shuffle as a 128-bit shuffles.
	static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
	ArrayRef<int> Mask, SDValue V1,
	SDValue V2, SelectionDAG &DAG) {
	assert(VT.getScalarSizeInBits() == 64 &&
	"Unexpected element type size for 128bit shuffle.");

	// To handle 256 bit vector requires VLX and most probably
	// function lowerV2X128VectorShuffle() is better solution.
	assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle.");

	SmallVector<int, 4> WidenedMask;
	if (!canWidenShuffleElements(Mask, WidenedMask))
	return SDValue();

	// Check for patterns which can be matched with a single insert of a 256-bit
	// subvector.
	bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask,
	{0, 1, 2, 3, 0, 1, 2, 3});
	if (OnlyUsesV1 \|\| isShuffleEquivalent(V1, V2, Mask,
	{0, 1, 2, 3, 8, 9, 10, 11})) {
	MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
	SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
	DAG.getIntPtrConstant(0, DL));
	SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
	OnlyUsesV1 ? V1 : V2,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
	}

	assert(WidenedMask.size() == 4);

	// See if this is an insertion of the lower 128-bits of V2 into V1.
	bool IsInsert = true;
	int V2Index = -1;
	for (int i = 0; i < 4; ++i) {
	assert(WidenedMask[i] >= -1);
	if (WidenedMask[i] < 0)
	continue;

	// Make sure all V1 subvectors are in place.
	if (WidenedMask[i] < 4) {
	if (WidenedMask[i] != i) {
	IsInsert = false;
	break;
	}
	} else {
	// Make sure we only have a single V2 index and its the lowest 128-bits.
	if (V2Index >= 0 \|\| WidenedMask[i] != 4) {
	IsInsert = false;
	break;
	}
	V2Index = i;
	}
	}
	if (IsInsert && V2Index >= 0) {
	MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
	SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
	DAG.getIntPtrConstant(0, DL));
	return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL);
	}

	// Try to lower to to vshuf64x2/vshuf32x4.
	SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
	unsigned PermMask = 0;
	// Insure elements came from the same Op.
	for (int i = 0; i < 4; ++i) {
	assert(WidenedMask[i] >= -1);
	if (WidenedMask[i] < 0)
	continue;

	SDValue Op = WidenedMask[i] >= 4 ? V2 : V1;
	unsigned OpIndex = i / 2;
	if (Ops[OpIndex].isUndef())
	Ops[OpIndex] = Op;
	else if (Ops[OpIndex] != Op)
	return SDValue();

	// Convert the 128-bit shuffle mask selection values into 128-bit selection
	// bits defined by a vshuf64x2 instruction's immediate control byte.
	PermMask \|= (WidenedMask[i] % 4) << (i * 2);
	}

	return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
	DAG.getConstant(PermMask, DL, MVT::i8));
	}

	/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
	static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
	assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

	if (V2.isUndef()) {
	// Use low duplicate instructions for masks that match their pattern.
	if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6}))
	return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v8f64, V1);

	if (!is128BitLaneCrossingShuffleMask(MVT::v8f64, Mask)) {
	// Non-half-crossing single input shuffles can be lowered with an
	// interleaved permutation.
	unsigned VPERMILPMask = (Mask[0] == 1) \| ((Mask[1] == 1) << 1) \|
	((Mask[2] == 3) << 2) \| ((Mask[3] == 3) << 3) \|
	((Mask[4] == 5) << 4) \| ((Mask[5] == 5) << 5) \|
	((Mask[6] == 7) << 6) \| ((Mask[7] == 7) << 7);
	return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1,
	DAG.getConstant(VPERMILPMask, DL, MVT::i8));
	}

	SmallVector<int, 4> RepeatedMask;
	if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask))
	return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8f64, V1,
	getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
	}

	if (SDValue Shuf128 =
	lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, V1, V2, DAG))
	return Shuf128;

	if (SDValue Unpck =
	lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
	return Unpck;

	// Check if the blend happens to exactly fit that of SHUFPD.
	if (SDValue Op =
	lowerVectorShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG))
	return Op;

	if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1,
	V2, DAG, Subtarget))
	return V;

	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
	}

	/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
	static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
	assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");

	// If the shuffle mask is repeated in each 128-bit lane, we have many more
	// options to efficiently lower the shuffle.
	SmallVector<int, 4> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v16f32, Mask, RepeatedMask)) {
	assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");

	// Use even/odd duplicate instructions for masks that match their pattern.
	if (isShuffleEquivalent(V1, V2, RepeatedMask, {0, 0, 2, 2}))
	return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v16f32, V1);
	if (isShuffleEquivalent(V1, V2, RepeatedMask, {1, 1, 3, 3}))
	return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v16f32, V1);

	if (V2.isUndef())
	return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v16f32, V1,
	getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue Unpck =
	lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
	return Unpck;

	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// Otherwise, fall back to a SHUFPS sequence.
	return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
	}
	// If we have AVX512F support, we can use VEXPAND.
	if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,
	V1, V2, DAG, Subtarget))
	return V;

	return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
	}

	/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
	static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
	assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

	if (SDValue Shuf128 =
	lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG))
	return Shuf128;

	if (V2.isUndef()) {
	// When the shuffle is mirrored between the 128-bit lanes of the unit, we
	// can use lower latency instructions that will operate on all four
	// 128-bit lanes.
	SmallVector<int, 2> Repeated128Mask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) {
	SmallVector<int, 4> PSHUFDMask;
	scaleShuffleMask(2, Repeated128Mask, PSHUFDMask);
	return DAG.getBitcast(
	MVT::v8i64,
	DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32,
	DAG.getBitcast(MVT::v16i32, V1),
	getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
	}

	SmallVector<int, 4> Repeated256Mask;
	if (is256BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated256Mask))
	return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8i64, V1,
	getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG));
	}

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use VALIGN.
	if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i64, V1, V2,
	Mask, Subtarget, DAG))
	return Rotate;

	// Try to use PALIGNR.
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2,
	Mask, Subtarget, DAG))
	return Rotate;

	if (SDValue Unpck =
	lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
	return Unpck;
	// If we have AVX512F support, we can use VEXPAND.
	if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1,
	V2, DAG, Subtarget))
	return V;

	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
	}

	/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
	static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
	assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
	DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	// If the shuffle mask is repeated in each 128-bit lane we can use more
	// efficient instructions that mirror the shuffles across the four 128-bit
	// lanes.
	SmallVector<int, 4> RepeatedMask;
	bool Is128BitLaneRepeatedShuffle =
	is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask);
	if (Is128BitLaneRepeatedShuffle) {
	assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
	if (V2.isUndef())
	return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, V1,
	getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
	return V;
	}

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use VALIGN.
	if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v16i32, V1, V2,
	Mask, Subtarget, DAG))
	return Rotate;

	// Try to use byte rotation instructions.
	if (Subtarget.hasBWI())
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
	DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG))
	return Rotate;

	// Assume that a single SHUFPS is faster than using a permv shuffle.
	// If some CPU is harmed by the domain switch, we can fix it in a later pass.
	if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
	SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1);
	SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2);
	SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,
	CastV1, CastV2, DAG);
	return DAG.getBitcast(MVT::v16i32, ShufPS);
	}
	// If we have AVX512F support, we can use VEXPAND.
	if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask,
	V1, V2, DAG, Subtarget))
	return V;

	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;
	return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
	}

	/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
	static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
	assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
	assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
	DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG))
	return V;

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
	DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG))
	return Rotate;

	if (V2.isUndef()) {
	SmallVector<int, 8> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MVT::v32i16, Mask, RepeatedMask)) {
	// As this is a single-input shuffle, the repeated mask should be
	// a strictly valid v8i16 mask that we can pass through to the v8i16
	// lowering to handle even the v32 case.
	return lowerV8I16GeneralSingleInputVectorShuffle(
	DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG);
	}
	}

	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
	}

	/// \brief Handle lowering of 64-lane 8-bit integer shuffles.
	static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	const APInt &Zeroable,
	SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
	assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
	assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
	assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");

	// Whenever we can lower this as a zext, that instruction is strictly faster
	// than any alternative. It also allows us to fold memory operands into the
	// shuffle in many cases.
	if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
	DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return ZExt;

	// Use dedicated unpack instructions for masks that match their pattern.
	if (SDValue V =
	lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
	return V;

	// Try to use shift instructions.
	if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Shift;

	// Try to use byte rotation instructions.
	if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
	DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
	return Rotate;

	if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
	DL, MVT::v64i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
	return PSHUFB;

	// VBMI can use VPERMV/VPERMV3 byte shuffles.
	if (Subtarget.hasVBMI())
	return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);

	// Try to create an in-lane repeating shuffle mask and then shuffle the
	// the results into the target lanes.
	if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
	DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
	return V;

	if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
	Zeroable, Subtarget, DAG))
	return Blend;

	// FIXME: Implement direct support for this type!
	return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
	}

	/// \brief High-level routine to lower various 512-bit x86 vector shuffles.
	///
	/// This routine either breaks down the specific type of a 512-bit x86 vector
	/// shuffle or splits it into two 256-bit shuffles and fuses the results back
	/// together based on the available instructions.
	static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	MVT VT, SDValue V1, SDValue V2,
	const APInt &Zeroable,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.hasAVX512() &&
	"Cannot lower 512-bit vectors w/ basic ISA!");

	// If we have a single input to the zero element, insert that into V1 if we
	// can do so cheaply.
	int NumElts = Mask.size();
	int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });

	if (NumV2Elements == 1 && Mask[0] >= NumElts)
	if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
	DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
	return Insertion;

	// Check for being able to broadcast a single element.
	if (SDValue Broadcast =
	lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
	return Broadcast;

	// Dispatch to each element type for lowering. If we don't have support for
	// specific element type shuffles at 512 bits, immediately split them and
	// lower them. Each lowering routine of a given type is allowed to assume that
	// the requisite ISA extensions for that element type are available.
	switch (VT.SimpleTy) {
	case MVT::v8f64:
	return lowerV8F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v16f32:
	return lowerV16F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v8i64:
	return lowerV8I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v16i32:
	return lowerV16I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v32i16:
	return lowerV32I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
	case MVT::v64i8:
	return lowerV64I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);

	default:
	llvm_unreachable("Not a valid 512-bit x86 vector type!");
	}
	}

	// Lower vXi1 vector shuffles.
	// There is no a dedicated instruction on AVX-512 that shuffles the masks.
	// The only way to shuffle bits is to sign-extend the mask vector to SIMD
	// vector, shuffle and then truncate it back.
	static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
	MVT VT, SDValue V1, SDValue V2,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.hasAVX512() &&
	"Cannot lower 512-bit vectors w/o basic ISA!");
	MVT ExtVT;
	switch (VT.SimpleTy) {
	default:
	llvm_unreachable("Expected a vector of i1 elements");
	case MVT::v2i1:
	ExtVT = MVT::v2i64;
	break;
	case MVT::v4i1:
	ExtVT = MVT::v4i32;
	break;
	case MVT::v8i1:
	ExtVT = MVT::v8i64; // Take 512-bit type, more shuffles on KNL
	break;
	case MVT::v16i1:
	ExtVT = MVT::v16i32;
	break;
	case MVT::v32i1:
	ExtVT = MVT::v32i16;
	break;
	case MVT::v64i1:
	ExtVT = MVT::v64i8;
	break;
	}

	if (ISD::isBuildVectorAllZeros(V1.getNode()))
	V1 = getZeroVector(ExtVT, Subtarget, DAG, DL);
	else if (ISD::isBuildVectorAllOnes(V1.getNode()))
	V1 = getOnesVector(ExtVT, DAG, DL);
	else
	V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1);

	if (V2.isUndef())
	V2 = DAG.getUNDEF(ExtVT);
	else if (ISD::isBuildVectorAllZeros(V2.getNode()))
	V2 = getZeroVector(ExtVT, Subtarget, DAG, DL);
	else if (ISD::isBuildVectorAllOnes(V2.getNode()))
	V2 = getOnesVector(ExtVT, DAG, DL);
	else
	V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2);

	SDValue Shuffle = DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask);
	// i1 was sign extended we can use X86ISD::CVT2MASK.
	int NumElems = VT.getVectorNumElements();
	if ((Subtarget.hasBWI() && (NumElems >= 32)) \|\|
	(Subtarget.hasDQI() && (NumElems < 32)))
	return DAG.getNode(X86ISD::CVT2MASK, DL, VT, Shuffle);

	return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle);
	}

	/// Helper function that returns true if the shuffle mask should be
	/// commuted to improve canonicalization.
	static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
	int NumElements = Mask.size();

	int NumV1Elements = 0, NumV2Elements = 0;
	for (int M : Mask)
	if (M < 0)
	continue;
	else if (M < NumElements)
	++NumV1Elements;
	else
	++NumV2Elements;

	// Commute the shuffle as needed such that more elements come from V1 than
	// V2. This allows us to match the shuffle pattern strictly on how many
	// elements come from V1 without handling the symmetric cases.
	if (NumV2Elements > NumV1Elements)
	return true;

	assert(NumV1Elements > 0 && "No V1 indices");

	if (NumV2Elements == 0)
	return false;

	// When the number of V1 and V2 elements are the same, try to minimize the
	// number of uses of V2 in the low half of the vector. When that is tied,
	// ensure that the sum of indices for V1 is equal to or lower than the sum
	// indices for V2. When those are equal, try to ensure that the number of odd
	// indices for V1 is lower than the number of odd indices for V2.
	if (NumV1Elements == NumV2Elements) {
	int LowV1Elements = 0, LowV2Elements = 0;
	for (int M : Mask.slice(0, NumElements / 2))
	if (M >= NumElements)
	++LowV2Elements;
	else if (M >= 0)
	++LowV1Elements;
	if (LowV2Elements > LowV1Elements)
	return true;
	if (LowV2Elements == LowV1Elements) {
	int SumV1Indices = 0, SumV2Indices = 0;
	for (int i = 0, Size = Mask.size(); i < Size; ++i)
	if (Mask[i] >= NumElements)
	SumV2Indices += i;
	else if (Mask[i] >= 0)
	SumV1Indices += i;
	if (SumV2Indices < SumV1Indices)
	return true;
	if (SumV2Indices == SumV1Indices) {
	int NumV1OddIndices = 0, NumV2OddIndices = 0;
	for (int i = 0, Size = Mask.size(); i < Size; ++i)
	if (Mask[i] >= NumElements)
	NumV2OddIndices += i % 2;
	else if (Mask[i] >= 0)
	NumV1OddIndices += i % 2;
	if (NumV2OddIndices < NumV1OddIndices)
	return true;
	}
	}
	}

	return false;
	}

	/// \brief Top-level lowering for x86 vector shuffles.
	///
	/// This handles decomposition, canonicalization, and lowering of all x86
	/// vector shuffles. Most of the specific lowering strategies are encapsulated
	/// above in helper routines. The canonicalization attempts to widen shuffles
	/// to involve fewer lanes of wider elements, consolidate symmetric patterns
	/// s.t. only one of the two inputs needs to be tested, etc.
	static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
	ArrayRef<int> Mask = SVOp->getMask();
	SDValue V1 = Op.getOperand(0);
	SDValue V2 = Op.getOperand(1);
	MVT VT = Op.getSimpleValueType();
	int NumElements = VT.getVectorNumElements();
	SDLoc DL(Op);
	bool Is1BitVector = (VT.getVectorElementType() == MVT::i1);

	assert((VT.getSizeInBits() != 64 \|\| Is1BitVector) &&
	"Can't lower MMX shuffles");

	bool V1IsUndef = V1.isUndef();
	bool V2IsUndef = V2.isUndef();
	if (V1IsUndef && V2IsUndef)
	return DAG.getUNDEF(VT);

	// When we create a shuffle node we put the UNDEF node to second operand,
	// but in some cases the first operand may be transformed to UNDEF.
	// In this case we should just commute the node.
	if (V1IsUndef)
	return DAG.getCommutedVectorShuffle(*SVOp);

	// Check for non-undef masks pointing at an undef vector and make the masks
	// undef as well. This makes it easier to match the shuffle based solely on
	// the mask.
	if (V2IsUndef)
	for (int M : Mask)
	if (M >= NumElements) {
	SmallVector<int, 8> NewMask(Mask.begin(), Mask.end());
	for (int &M : NewMask)
	if (M >= NumElements)
	M = -1;
	return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
	}

	// Check for illegal shuffle mask element index values.
	int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); (void)MaskUpperLimit;
	assert(llvm::all_of(Mask,
	[&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
	"Out of bounds shuffle index");

	// We actually see shuffles that are entirely re-arrangements of a set of
	// zero inputs. This mostly happens while decomposing complex shuffles into
	// simple ones. Directly lower these as a buildvector of zeros.
	APInt Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
	if (Zeroable.isAllOnesValue())
	return getZeroVector(VT, Subtarget, DAG, DL);

	// Try to collapse shuffles into using a vector type with fewer elements but
	// wider element types. We cap this to not form integers or floating point
	// elements wider than 64 bits, but it might be interesting to form i128
	// integers to handle flipping the low and high halves of AVX 256-bit vectors.
	SmallVector<int, 16> WidenedMask;
	if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
	canWidenShuffleElements(Mask, WidenedMask)) {
	MVT NewEltVT = VT.isFloatingPoint()
	? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
	: MVT::getIntegerVT(VT.getScalarSizeInBits() * 2);
	MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
	// Make sure that the new vector type is legal. For example, v2f64 isn't
	// legal on SSE1.
	if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
	V1 = DAG.getBitcast(NewVT, V1);
	V2 = DAG.getBitcast(NewVT, V2);
	return DAG.getBitcast(
	VT, DAG.getVectorShuffle(NewVT, DL, V1, V2, WidenedMask));
	}
	}

	// Commute the shuffle if it will improve canonicalization.
	if (canonicalizeShuffleMaskWithCommute(Mask))
	return DAG.getCommutedVectorShuffle(*SVOp);

	// For each vector width, delegate to a specialized lowering routine.
	if (VT.is128BitVector())
	return lower128BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
	DAG);

	if (VT.is256BitVector())
	return lower256BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
	DAG);

	if (VT.is512BitVector())
	return lower512BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
	DAG);

	if (Is1BitVector)
	return lower1BitVectorShuffle(DL, Mask, VT, V1, V2, Subtarget, DAG);

	llvm_unreachable("Unimplemented!");
	}

	/// \brief Try to lower a VSELECT instruction to a vector shuffle.
	static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue Cond = Op.getOperand(0);
	SDValue LHS = Op.getOperand(1);
	SDValue RHS = Op.getOperand(2);
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();

	if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
	return SDValue();
	auto *CondBV = cast<BuildVectorSDNode>(Cond);

	// Only non-legal VSELECTs reach this lowering, convert those into generic
	// shuffles and re-use the shuffle lowering path for blends.
	SmallVector<int, 32> Mask;
	for (int i = 0, Size = VT.getVectorNumElements(); i < Size; ++i) {
	SDValue CondElt = CondBV->getOperand(i);
	Mask.push_back(
	isa<ConstantSDNode>(CondElt) ? i + (isNullConstant(CondElt) ? Size : 0)
	: -1);
	}
	return DAG.getVectorShuffle(VT, dl, LHS, RHS, Mask);
	}

	SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
	// A vselect where all conditions and data are constants can be optimized into
	// a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
	if (ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(0).getNode()) &&
	ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(1).getNode()) &&
	ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode()))
	return SDValue();

	// If this VSELECT has a vector if i1 as a mask, it will be directly matched
	// with patterns on the mask registers on AVX-512.
	if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1)
	return Op;

	// Try to lower this to a blend-style vector shuffle. This can handle all
	// constant condition cases.
	if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG))
	return BlendOp;

	// Variable blends are only legal from SSE4.1 onward.
	if (!Subtarget.hasSSE41())
	return SDValue();

	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();

	// If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition
	// into an i1 condition so that we can use the mask-based 512-bit blend
	// instructions.
	if (VT.getSizeInBits() == 512) {
	SDValue Cond = Op.getOperand(0);
	// The vNi1 condition case should be handled above as it can be trivially
	// lowered.
	assert(Cond.getValueType().getScalarSizeInBits() ==
	VT.getScalarSizeInBits() &&
	"Should have a size-matched integer condition!");
	// Build a mask by testing the condition against itself (tests for zero).
	MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
	SDValue Mask = DAG.getNode(X86ISD::TESTM, dl, MaskVT, Cond, Cond);
	// Now return a new VSELECT using the mask.
	return DAG.getSelect(dl, VT, Mask, Op.getOperand(1), Op.getOperand(2));
	}

	// Only some types will be legal on some subtargets. If we can emit a legal
	// VSELECT-matching blend, return Op, and but if we need to expand, return
	// a null value.
	switch (VT.SimpleTy) {
	default:
	// Most of the vector types have blends past SSE4.1.
	return Op;

	case MVT::v32i8:
	// The byte blends for AVX vectors were introduced only in AVX2.
	if (Subtarget.hasAVX2())
	return Op;

	return SDValue();

	case MVT::v8i16:
	case MVT::v16i16:
	// AVX-512 BWI and VLX features support VSELECT with i16 elements.
	if (Subtarget.hasBWI() && Subtarget.hasVLX())
	return Op;

	// FIXME: We should custom lower this by fixing the condition and using i8
	// blends.
	return SDValue();
	}
	}

	static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);

	if (!Op.getOperand(0).getSimpleValueType().is128BitVector())
	return SDValue();

	if (VT.getSizeInBits() == 8) {
	SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
	Op.getOperand(0), Op.getOperand(1));
	SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
	DAG.getValueType(VT));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
	}

	if (VT == MVT::f32) {
	// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
	// the result back to FR32 register. It's only worth matching if the
	// result has a single use which is a store or a bitcast to i32. And in
	// the case of a store, it's not worth it if the index is a constant 0,
	// because a MOVSSmr can be used instead, which is smaller and faster.
	if (!Op.hasOneUse())
	return SDValue();
	SDNode User = Op.getNode()->use_begin();
	if ((User->getOpcode() != ISD::STORE \|\|
	isNullConstant(Op.getOperand(1))) &&
	(User->getOpcode() != ISD::BITCAST \|\|
	User->getValueType(0) != MVT::i32))
	return SDValue();
	SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
	DAG.getBitcast(MVT::v4i32, Op.getOperand(0)),
	Op.getOperand(1));
	return DAG.getBitcast(MVT::f32, Extract);
	}

	if (VT == MVT::i32 \|\| VT == MVT::i64) {
	// ExtractPS/pextrq works with constant index.
	if (isa<ConstantSDNode>(Op.getOperand(1)))
	return Op;
	}

	return SDValue();
	}

	/// Extract one bit from mask vector, like v16i1 or v8i1.
	/// AVX-512 feature.
	SDValue
	X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const {
	SDValue Vec = Op.getOperand(0);
	SDLoc dl(Vec);
	MVT VecVT = Vec.getSimpleValueType();
	SDValue Idx = Op.getOperand(1);
	MVT EltVT = Op.getSimpleValueType();

	assert((VecVT.getVectorNumElements() <= 16 \|\| Subtarget.hasBWI()) &&
	"Unexpected vector type in ExtractBitFromMaskVector");

	// variable index can't be handled in mask registers,
	// extend vector to VR512/128
	if (!isa<ConstantSDNode>(Idx)) {
	unsigned NumElts = VecVT.getVectorNumElements();
	// Extending v8i1/v16i1 to 512-bit get better performance on KNL
	// than extending to 128/256bit.
	unsigned VecSize = (NumElts <= 4 ? 128 : 512);
	MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize/NumElts), NumElts);
	SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVT, Vec);
	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
	ExtVT.getVectorElementType(), Ext, Idx);
	return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
	}

	unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
	if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) \|\|
	(VecVT.getVectorNumElements() < 8)) {
	// Use kshiftlw/rw instruction.
	VecVT = MVT::v16i1;
	Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT,
	DAG.getUNDEF(VecVT),
	Vec,
	DAG.getIntPtrConstant(0, dl));
	}
	unsigned MaxSift = VecVT.getVectorNumElements() - 1;
	if (MaxSift - IdxVal)
	Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
	DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
	Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
	DAG.getConstant(MaxSift, dl, MVT::i8));
	return DAG.getNode(X86ISD::VEXTRACT, dl, Op.getSimpleValueType(), Vec,
	DAG.getIntPtrConstant(0, dl));
	}

	SDValue
	X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	SDValue Vec = Op.getOperand(0);
	MVT VecVT = Vec.getSimpleValueType();
	SDValue Idx = Op.getOperand(1);

	if (VecVT.getVectorElementType() == MVT::i1)
	return ExtractBitFromMaskVector(Op, DAG);

	if (!isa<ConstantSDNode>(Idx)) {
	// Its more profitable to go through memory (1 cycles throughput)
	// than using VMOVD + VPERMV/PSHUFB sequence ( 2/3 cycles throughput)
	// IACA tool was used to get performance estimation
	// (https://software.intel.com/en-us/articles/intel-architecture-code-analyzer)
	//
	// example : extractelement <16 x i8> %a, i32 %i
	//
	// Block Throughput: 3.00 Cycles
	// Throughput Bottleneck: Port5
	//
	// \| Num Of \| Ports pressure in cycles \| \|
	// \| Uops \| 0 - DV \| 5 \| 6 \| 7 \| \|
	// ---------------------------------------------
	// \| 1 \| \| 1.0 \| \| \| CP \| vmovd xmm1, edi
	// \| 1 \| \| 1.0 \| \| \| CP \| vpshufb xmm0, xmm0, xmm1
	// \| 2 \| 1.0 \| 1.0 \| \| \| CP \| vpextrb eax, xmm0, 0x0
	// Total Num Of Uops: 4
	//
	//
	// Block Throughput: 1.00 Cycles
	// Throughput Bottleneck: PORT2_AGU, PORT3_AGU, Port4
	//
	// \| \| Ports pressure in cycles \| \|
	// \|Uops\| 1 \| 2 - D \|3 - D \| 4 \| 5 \| \|
	// ---------------------------------------------------------
	// \|2^ \| \| 0.5 \| 0.5 \|1.0\| \|CP\| vmovaps xmmword ptr [rsp-0x18], xmm0
	// \|1 \|0.5\| \| \| \|0.5\| \| lea rax, ptr [rsp-0x18]
	// \|1 \| \|0.5, 0.5\|0.5, 0.5\| \| \|CP\| mov al, byte ptr [rdi+rax*1]
	// Total Num Of Uops: 4

	return SDValue();
	}

	unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();

	// If this is a 256-bit vector result, first extract the 128-bit vector and
	// then extract the element from the 128-bit vector.
	if (VecVT.is256BitVector() \|\| VecVT.is512BitVector()) {
	// Get the 128-bit vector.
	Vec = extract128BitVector(Vec, IdxVal, DAG, dl);
	MVT EltVT = VecVT.getVectorElementType();

	unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
	assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");

	// Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2
	// this can be done with a mask.
	IdxVal &= ElemsPerChunk - 1;
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
	DAG.getConstant(IdxVal, dl, MVT::i32));
	}

	assert(VecVT.is128BitVector() && "Unexpected vector length");

	MVT VT = Op.getSimpleValueType();

	if (VT.getSizeInBits() == 16) {
	// If IdxVal is 0, it's cheaper to do a move instead of a pextrw, unless
	// we're going to zero extend the register or fold the store (SSE41 only).
	if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) &&
	!(Subtarget.hasSSE41() && MayFoldIntoStore(Op)))
	return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
	DAG.getBitcast(MVT::v4i32, Vec), Idx));

	// Transform it so it match pextrw which produces a 32-bit result.
	SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
	Op.getOperand(0), Op.getOperand(1));
	SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
	DAG.getValueType(VT));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
	}

	if (Subtarget.hasSSE41())
	if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
	return Res;

	// TODO: We only extract a single element from v16i8, we can probably afford
	// to be more aggressive here before using the default approach of spilling to
	// stack.
	if (VT.getSizeInBits() == 8 && Op->isOnlyUserOf(Vec.getNode())) {
	// Extract either the lowest i32 or any i16, and extract the sub-byte.
	int DWordIdx = IdxVal / 4;
	if (DWordIdx == 0) {
	SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
	DAG.getBitcast(MVT::v4i32, Vec),
	DAG.getIntPtrConstant(DWordIdx, dl));
	int ShiftVal = (IdxVal % 4) * 8;
	if (ShiftVal != 0)
	Res = DAG.getNode(ISD::SRL, dl, MVT::i32, Res,
	DAG.getConstant(ShiftVal, dl, MVT::i32));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
	}

	int WordIdx = IdxVal / 2;
	SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
	DAG.getBitcast(MVT::v8i16, Vec),
	DAG.getIntPtrConstant(WordIdx, dl));
	int ShiftVal = (IdxVal % 2) * 8;
	if (ShiftVal != 0)
	Res = DAG.getNode(ISD::SRL, dl, MVT::i16, Res,
	DAG.getConstant(ShiftVal, dl, MVT::i16));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
	}

	if (VT.getSizeInBits() == 32) {
	if (IdxVal == 0)
	return Op;

	// SHUFPS the element to the lowest double word, then movss.
	int Mask[4] = { static_cast<int>(IdxVal), -1, -1, -1 };
	Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
	DAG.getIntPtrConstant(0, dl));
	}

	if (VT.getSizeInBits() == 64) {
	// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
	// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
	// to match extract_elt for f64.
	if (IdxVal == 0)
	return Op;

	// UNPCKHPD the element to the lowest double word, then movsd.
	// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
	// to a f64mem, the whole operation is folded into a single MOVHPDmr.
	int Mask[2] = { 1, -1 };
	Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
	DAG.getIntPtrConstant(0, dl));
	}

	return SDValue();
	}

	/// Insert one bit to mask vector, like v16i1 or v8i1.
	/// AVX-512 feature.
	SDValue
	X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
	SDLoc dl(Op);
	SDValue Vec = Op.getOperand(0);
	SDValue Elt = Op.getOperand(1);
	SDValue Idx = Op.getOperand(2);
	MVT VecVT = Vec.getSimpleValueType();

	if (!isa<ConstantSDNode>(Idx)) {
	// Non constant index. Extend source and destination,
	// insert element and then truncate the result.
	MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
	MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32);
	SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
	DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec),
	DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx);
	return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
	}

	unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
	SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
	unsigned NumElems = VecVT.getVectorNumElements();

	if(Vec.isUndef()) {
	if (IdxVal)
	EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
	DAG.getConstant(IdxVal, dl, MVT::i8));
	return EltInVec;
	}

	// Insertion of one bit into first position
	if (IdxVal == 0 ) {
	// Clean top bits of vector.
	EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
	DAG.getConstant(NumElems - 1, dl, MVT::i8));
	EltInVec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, EltInVec,
	DAG.getConstant(NumElems - 1, dl, MVT::i8));
	// Clean the first bit in source vector.
	Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
	DAG.getConstant(1 , dl, MVT::i8));
	Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
	DAG.getConstant(1, dl, MVT::i8));

	return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
	}
	// Insertion of one bit into last position
	if (IdxVal == NumElems -1) {
	// Move the bit to the last position inside the vector.
	EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
	DAG.getConstant(IdxVal, dl, MVT::i8));
	// Clean the last bit in the source vector.
	Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
	DAG.getConstant(1, dl, MVT::i8));
	Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
	DAG.getConstant(1 , dl, MVT::i8));

	return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
	}

	// Use shuffle to insert element.
	SmallVector<int, 64> MaskVec(NumElems);
	for (unsigned i = 0; i != NumElems; ++i)
	MaskVec[i] = (i == IdxVal) ? NumElems : i;

	return DAG.getVectorShuffle(VecVT, dl, Vec, EltInVec, MaskVec);
	}

	SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	MVT VT = Op.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	unsigned NumElts = VT.getVectorNumElements();

	if (EltVT == MVT::i1)
	return InsertBitToMaskVector(Op, DAG);

	SDLoc dl(Op);
	SDValue N0 = Op.getOperand(0);
	SDValue N1 = Op.getOperand(1);
	SDValue N2 = Op.getOperand(2);
	if (!isa<ConstantSDNode>(N2))
	return SDValue();
	auto *N2C = cast<ConstantSDNode>(N2);
	unsigned IdxVal = N2C->getZExtValue();

	bool IsZeroElt = X86::isZeroNode(N1);
	bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);

	// If we are inserting a element, see if we can do this more efficiently with
	// a blend shuffle with a rematerializable vector than a costly integer
	// insertion.
	if ((IsZeroElt \|\| IsAllOnesElt) && Subtarget.hasSSE41() &&
	16 <= EltVT.getSizeInBits()) {
	SmallVector<int, 8> BlendMask;
	for (unsigned i = 0; i != NumElts; ++i)
	BlendMask.push_back(i == IdxVal ? i + NumElts : i);
	SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl)
	: DAG.getConstant(-1, dl, VT);
	return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask);
	}

	// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
	// into that, and then insert the subvector back into the result.
	if (VT.is256BitVector() \|\| VT.is512BitVector()) {
	// With a 256-bit vector, we can insert into the zero element efficiently
	// using a blend if we have AVX or AVX2 and the right data type.
	if (VT.is256BitVector() && IdxVal == 0) {
	// TODO: It is worthwhile to cast integer to floating point and back
	// and incur a domain crossing penalty if that's what we'll end up
	// doing anyway after extracting to a 128-bit vector.
	if ((Subtarget.hasAVX() && (EltVT == MVT::f64 \|\| EltVT == MVT::f32)) \|\|
	(Subtarget.hasAVX2() && EltVT == MVT::i32)) {
	SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
	N2 = DAG.getIntPtrConstant(1, dl);
	return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2);
	}
	}

	// Get the desired 128-bit vector chunk.
	SDValue V = extract128BitVector(N0, IdxVal, DAG, dl);

	// Insert the element into the desired chunk.
	unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
	assert(isPowerOf2_32(NumEltsIn128));
	// Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
	unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);

	V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
	DAG.getConstant(IdxIn128, dl, MVT::i32));

	// Insert the changed part back into the bigger vector
	return insert128BitVector(N0, V, IdxVal, DAG, dl);
	}
	assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");

	// Transform it so it match pinsr{b,w} which expects a GR32 as its second
	// argument. SSE41 required for pinsrb.
	if (VT == MVT::v8i16 \|\| (VT == MVT::v16i8 && Subtarget.hasSSE41())) {
	unsigned Opc;
	if (VT == MVT::v8i16) {
	assert(Subtarget.hasSSE2() && "SSE2 required for PINSRW");
	Opc = X86ISD::PINSRW;
	} else {
	assert(VT == MVT::v16i8 && "PINSRB requires v16i8 vector");
	assert(Subtarget.hasSSE41() && "SSE41 required for PINSRB");
	Opc = X86ISD::PINSRB;
	}

	if (N1.getValueType() != MVT::i32)
	N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
	if (N2.getValueType() != MVT::i32)
	N2 = DAG.getIntPtrConstant(IdxVal, dl);
	return DAG.getNode(Opc, dl, VT, N0, N1, N2);
	}

	if (Subtarget.hasSSE41()) {
	if (EltVT == MVT::f32) {
	// Bits [7:6] of the constant are the source select. This will always be
	// zero here. The DAG Combiner may combine an extract_elt index into
	// these bits. For example (insert (extract, 3), 2) could be matched by
	// putting the '3' into bits [7:6] of X86ISD::INSERTPS.
	// Bits [5:4] of the constant are the destination select. This is the
	// value of the incoming immediate.
	// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
	// combine either bitwise AND or insert of float 0.0 to set these bits.

	bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize();
	if (IdxVal == 0 && (!MinSize \|\| !MayFoldLoad(N1))) {
	// If this is an insertion of 32-bits into the low 32-bits of
	// a vector, we prefer to generate a blend with immediate rather
	// than an insertps. Blends are simpler operations in hardware and so
	// will always have equal or better performance than insertps.
	// But if optimizing for size and there's a load folding opportunity,
	// generate insertps because blendps does not have a 32-bit memory
	// operand form.
	N2 = DAG.getIntPtrConstant(1, dl);
	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
	return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, N2);
	}
	N2 = DAG.getIntPtrConstant(IdxVal << 4, dl);
	// Create this as a scalar to vector..
	N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
	return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
	}

	// PINSR* works with constant index.
	if (EltVT == MVT::i32 \|\| EltVT == MVT::i64)
	return Op;
	}

	return SDValue();
	}

	static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc dl(Op);
	MVT OpVT = Op.getSimpleValueType();

	// It's always cheaper to replace a xor+movd with xorps and simplifies further
	// combines.
	if (X86::isZeroNode(Op.getOperand(0)))
	return getZeroVector(OpVT, Subtarget, DAG, dl);

	// If this is a 256-bit vector result, first insert into a 128-bit
	// vector and then insert into the 256-bit vector.
	if (!OpVT.is128BitVector()) {
	// Insert into a 128-bit vector.
	unsigned SizeFactor = OpVT.getSizeInBits() / 128;
	MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(),
	OpVT.getVectorNumElements() / SizeFactor);

	Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));

	// Insert the 128-bit vector.
	return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
	}
	assert(OpVT.is128BitVector() && "Expected an SSE type!");

	// Pass through a v4i32 SCALAR_TO_VECTOR as that's what we use in tblgen.
	if (OpVT == MVT::v4i32)
	return Op;

	SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
	return DAG.getBitcast(
	OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt));
	}

	// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
	// a simple subregister reference or explicit instructions to grab
	// upper bits of a vector.
	static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.hasAVX() && "EXTRACT_SUBVECTOR requires AVX");

	SDLoc dl(Op);
	SDValue In = Op.getOperand(0);
	SDValue Idx = Op.getOperand(1);
	unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
	MVT ResVT = Op.getSimpleValueType();

	// When v1i1 is legal a scalarization of a vselect with a vXi1 Cond
	// would result with: v1i1 = extract_subvector(vXi1, idx).
	// Lower these into extract_vector_elt which is already selectable.
	if (ResVT == MVT::v1i1) {
	assert(Subtarget.hasAVX512() &&
	"Boolean EXTRACT_SUBVECTOR requires AVX512");

	MVT EltVT = ResVT.getVectorElementType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	MVT LegalVT =
	(TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT();
	SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx);
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res);
	}

	assert((In.getSimpleValueType().is256BitVector() \|\|
	In.getSimpleValueType().is512BitVector()) &&
	"Can only extract from 256-bit or 512-bit vectors");

	// If the input is a buildvector just emit a smaller one.
	unsigned ElemsPerChunk = ResVT.getVectorNumElements();
	if (In.getOpcode() == ISD::BUILD_VECTOR)
	return DAG.getBuildVector(
	ResVT, dl, makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk));

	// Everything else is legal.
	return Op;
	}

	// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
	// simple superregister reference or explicit instructions to insert
	// the upper bits of a vector.
	static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1);

	return insert1BitVector(Op, DAG, Subtarget);
	}

	// Returns the appropriate wrapper opcode for a global reference.
	unsigned X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV) const {
	// References to absolute symbols are never PC-relative.
	if (GV && GV->isAbsoluteSymbolRef())
	return X86ISD::Wrapper;

	CodeModel::Model M = getTargetMachine().getCodeModel();
	if (Subtarget.isPICStyleRIPRel() &&
	(M == CodeModel::Small \|\| M == CodeModel::Kernel))
	return X86ISD::WrapperRIP;

	return X86ISD::Wrapper;
	}

	// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
	// their target counterpart wrapped in the X86ISD::Wrapper node. Suppose N is
	// one of the above mentioned nodes. It has to be wrapped because otherwise
	// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
	// be used to form addressing mode. These wrapped nodes will be selected
	// into MOV32ri.
	SDValue
	X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);

	// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
	// global base reg.
	unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr);

	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Result = DAG.getTargetConstantPool(
	CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), OpFlag);
	SDLoc DL(CP);
	Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
	// With PIC, the address is actually $g + Offset.
	if (OpFlag) {
	Result =
	DAG.getNode(ISD::ADD, DL, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
	}

	return Result;
	}

	SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);

	// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
	// global base reg.
	unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr);

	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
	SDLoc DL(JT);
	Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);

	// With PIC, the address is actually $g + Offset.
	if (OpFlag)
	Result =
	DAG.getNode(ISD::ADD, DL, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);

	return Result;
	}

	SDValue
	X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
	const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();

	// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
	// global base reg.
	const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
	unsigned char OpFlag = Subtarget.classifyGlobalReference(nullptr, *Mod);

	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT, OpFlag);

	SDLoc DL(Op);
	Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);

	// With PIC, the address is actually $g + Offset.
	if (isPositionIndependent() && !Subtarget.is64Bit()) {
	Result =
	DAG.getNode(ISD::ADD, DL, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
	}

	// For symbols that require a load from a stub to get the address, emit the
	// load.
	if (isGlobalStubReference(OpFlag))
	Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));

	return Result;
	}

	SDValue
	X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
	// Create the TargetBlockAddressAddress node.
	unsigned char OpFlags =
	Subtarget.classifyBlockAddressReference();
	const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
	int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
	SDLoc dl(Op);
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags);
	Result = DAG.getNode(getGlobalWrapperKind(), dl, PtrVT, Result);

	// With PIC, the address is actually $g + Offset.
	if (isGlobalRelativeToPICBase(OpFlags)) {
	Result = DAG.getNode(ISD::ADD, dl, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
	}

	return Result;
	}

	SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV,
	const SDLoc &dl, int64_t Offset,
	SelectionDAG &DAG) const {
	// Create the TargetGlobalAddress node, folding in the constant
	// offset if it is legal.
	unsigned char OpFlags = Subtarget.classifyGlobalReference(GV);
	CodeModel::Model M = DAG.getTarget().getCodeModel();
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Result;
	if (OpFlags == X86II::MO_NO_FLAG &&
	X86::isOffsetSuitableForCodeModel(Offset, M)) {
	// A direct static reference to a global.
	Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
	Offset = 0;
	} else {
	Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags);
	}

	Result = DAG.getNode(getGlobalWrapperKind(GV), dl, PtrVT, Result);

	// With PIC, the address is actually $g + Offset.
	if (isGlobalRelativeToPICBase(OpFlags)) {
	Result = DAG.getNode(ISD::ADD, dl, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
	}

	// For globals that require a load from a stub to get the address, emit the
	// load.
	if (isGlobalStubReference(OpFlags))
	Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));

	// If there was a non-zero offset that we didn't fold, create an explicit
	// addition for it.
	if (Offset != 0)
	Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result,
	DAG.getConstant(Offset, dl, PtrVT));

	return Result;
	}

	SDValue
	X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
	const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
	int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
	return LowerGlobalAddress(GV, SDLoc(Op), Offset, DAG);
	}

	static SDValue
	GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
	SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
	unsigned char OperandFlags, bool LocalDynamic = false) {
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SDLoc dl(GA);
	SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
	GA->getValueType(0),
	GA->getOffset(),
	OperandFlags);

	X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
	: X86ISD::TLSADDR;

	if (InFlag) {
	SDValue Ops[] = { Chain, TGA, *InFlag };
	Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
	} else {
	SDValue Ops[] = { Chain, TGA };
	Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
	}

	// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
	MFI.setAdjustsStack(true);
	MFI.setHasCalls(true);

	SDValue Flag = Chain.getValue(1);
	return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
	}

	// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
	static SDValue
	LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
	const EVT PtrVT) {
	SDValue InFlag;
	SDLoc dl(GA); // ? function entry point might be better
	SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
	DAG.getNode(X86ISD::GlobalBaseReg,
	SDLoc(), PtrVT), InFlag);
	InFlag = Chain.getValue(1);

	return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
	}

	// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
	static SDValue
	LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
	const EVT PtrVT) {
	return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
	X86::RAX, X86II::MO_TLSGD);
	}

	static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
	SelectionDAG &DAG,
	const EVT PtrVT,
	bool is64Bit) {
	SDLoc dl(GA);

	// Get the start address of the TLS block for this module.
	X86MachineFunctionInfo *MFI = DAG.getMachineFunction()
	.getInfo<X86MachineFunctionInfo>();
	MFI->incNumLocalDynamicTLSAccesses();

	SDValue Base;
	if (is64Bit) {
	Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
	X86II::MO_TLSLD, /LocalDynamic=/true);
	} else {
	SDValue InFlag;
	SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
	DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag);
	InFlag = Chain.getValue(1);
	Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
	X86II::MO_TLSLDM, /LocalDynamic=/true);
	}

	// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
	// of Base.

	// Build x@dtpoff.
	unsigned char OperandFlags = X86II::MO_DTPOFF;
	unsigned WrapperKind = X86ISD::Wrapper;
	SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
	GA->getValueType(0),
	GA->getOffset(), OperandFlags);
	SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);

	// Add x@dtpoff with the base.
	return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base);
	}

	// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model.
	static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
	const EVT PtrVT, TLSModel::Model model,
	bool is64Bit, bool isPIC) {
	SDLoc dl(GA);

	// Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
	Value Ptr = Constant::getNullValue(Type::getInt8PtrTy(DAG.getContext(),
	is64Bit ? 257 : 256));

	SDValue ThreadPointer =
	DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl),
	MachinePointerInfo(Ptr));

	unsigned char OperandFlags = 0;
	// Most TLS accesses are not RIP relative, even on x86-64. One exception is
	// initialexec.
	unsigned WrapperKind = X86ISD::Wrapper;
	if (model == TLSModel::LocalExec) {
	OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
	} else if (model == TLSModel::InitialExec) {
	if (is64Bit) {
	OperandFlags = X86II::MO_GOTTPOFF;
	WrapperKind = X86ISD::WrapperRIP;
	} else {
	OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF;
	}
	} else {
	llvm_unreachable("Unexpected model");
	}

	// emit "addl x@ntpoff,%eax" (local exec)
	// or "addl x@indntpoff,%eax" (initial exec)
	// or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
	SDValue TGA =
	DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
	GA->getOffset(), OperandFlags);
	SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);

	if (model == TLSModel::InitialExec) {
	if (isPIC && !is64Bit) {
	Offset = DAG.getNode(ISD::ADD, dl, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
	Offset);
	}

	Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));
	}

	// The address of the thread local variable is the add of the thread
	// pointer with the offset of the variable.
	return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
	}

	SDValue
	X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {

	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);

	if (DAG.getTarget().Options.EmulatedTLS)
	return LowerToTLSEmulatedModel(GA, DAG);

	const GlobalValue *GV = GA->getGlobal();
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	bool PositionIndependent = isPositionIndependent();

	if (Subtarget.isTargetELF()) {
	TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
	switch (model) {
	case TLSModel::GeneralDynamic:
	if (Subtarget.is64Bit())
	return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
	return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
	case TLSModel::LocalDynamic:
	return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT,
	Subtarget.is64Bit());
	case TLSModel::InitialExec:
	case TLSModel::LocalExec:
	return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
	PositionIndependent);
	}
	llvm_unreachable("Unknown TLS model.");
	}

	if (Subtarget.isTargetDarwin()) {
	// Darwin only has one model of TLS. Lower to that.
	unsigned char OpFlag = 0;
	unsigned WrapperKind = Subtarget.isPICStyleRIPRel() ?
	X86ISD::WrapperRIP : X86ISD::Wrapper;

	// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
	// global base reg.
	bool PIC32 = PositionIndependent && !Subtarget.is64Bit();
	if (PIC32)
	OpFlag = X86II::MO_TLVP_PIC_BASE;
	else
	OpFlag = X86II::MO_TLVP;
	SDLoc DL(Op);
	SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
	GA->getValueType(0),
	GA->getOffset(), OpFlag);
	SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result);

	// With PIC32, the address is actually $g + Offset.
	if (PIC32)
	Offset = DAG.getNode(ISD::ADD, DL, PtrVT,
	DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
	Offset);

	// Lowering the machine isd will make sure everything is in the right
	// location.
	SDValue Chain = DAG.getEntryNode();
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
	SDValue Args[] = { Chain, Offset };
	Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args);
	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
	DAG.getIntPtrConstant(0, DL, true),
	Chain.getValue(1), DL);

	// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setAdjustsStack(true);

	// And our return value (tls address) is in the standard call return value
	// location.
	unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
	return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1));
	}

	if (Subtarget.isTargetKnownWindowsMSVC() \|\|
	Subtarget.isTargetWindowsItanium() \|\|
	Subtarget.isTargetWindowsGNU()) {
	// Just use the implicit TLS architecture
	// Need to generate something similar to:
	// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
	// ; from TEB
	// mov ecx, dword [rel _tls_index]: Load index (from C runtime)
	// mov rcx, qword [rdx+rcx*8]
	// mov eax, .tls$:tlsvar
	// [rax+rcx] contains the address
	// Windows 64bit: gs:0x58
	// Windows 32bit: fs:__tls_array

	SDLoc dl(GA);
	SDValue Chain = DAG.getEntryNode();

	// Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
	// %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
	// use its literal value of 0x2C.
	Value *Ptr = Constant::getNullValue(Subtarget.is64Bit()
	? Type::getInt8PtrTy(*DAG.getContext(),
	256)
	: Type::getInt32PtrTy(*DAG.getContext(),
	257));

	SDValue TlsArray = Subtarget.is64Bit()
	? DAG.getIntPtrConstant(0x58, dl)
	: (Subtarget.isTargetWindowsGNU()
	? DAG.getIntPtrConstant(0x2C, dl)
	: DAG.getExternalSymbol("_tls_array", PtrVT));

	SDValue ThreadPointer =
	DAG.getLoad(PtrVT, dl, Chain, TlsArray, MachinePointerInfo(Ptr));

	SDValue res;
	if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) {
	res = ThreadPointer;
	} else {
	// Load the _tls_index variable
	SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT);
	if (Subtarget.is64Bit())
	IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX,
	MachinePointerInfo(), MVT::i32);
	else
	IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo());

	auto &DL = DAG.getDataLayout();
	SDValue Scale =
	DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, PtrVT);
	IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale);

	res = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, IDX);
	}

	res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo());

	// Get the offset of start of .tls section
	SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
	GA->getValueType(0),
	GA->getOffset(), X86II::MO_SECREL);
	SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);

	// The address of the thread local variable is the add of the thread
	// pointer with the offset of the variable.
	return DAG.getNode(ISD::ADD, dl, PtrVT, res, Offset);
	}

	llvm_unreachable("TLS not implemented for this target.");
	}

	/// Lower SRA_PARTS and friends, which return two i32 values
	/// and take a 2 x i32 value to shift plus a shift amount.
	static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
	assert(Op.getNumOperands() == 3 && "Not a double-shift!");
	MVT VT = Op.getSimpleValueType();
	unsigned VTBits = VT.getSizeInBits();
	SDLoc dl(Op);
	bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
	SDValue ShOpLo = Op.getOperand(0);
	SDValue ShOpHi = Op.getOperand(1);
	SDValue ShAmt = Op.getOperand(2);
	// X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the
	// generic ISD nodes haven't. Insert an AND to be safe, it's optimized away
	// during isel.
	SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
	DAG.getConstant(VTBits - 1, dl, MVT::i8));
	SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
	DAG.getConstant(VTBits - 1, dl, MVT::i8))
	: DAG.getConstant(0, dl, VT);

	SDValue Tmp2, Tmp3;
	if (Op.getOpcode() == ISD::SHL_PARTS) {
	Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
	Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
	} else {
	Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
	Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
	}

	// If the shift amount is larger or equal than the width of a part we can't
	// rely on the results of shld/shrd. Insert a test and select the appropriate
	// values for large shift amounts.
	SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
	DAG.getConstant(VTBits, dl, MVT::i8));
	SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
	AndNode, DAG.getConstant(0, dl, MVT::i8));

	SDValue Hi, Lo;
	SDValue CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
	SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
	SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };

	if (Op.getOpcode() == ISD::SHL_PARTS) {
	Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0);
	Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1);
	} else {
	Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0);
	Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1);
	}

	SDValue Ops[2] = { Lo, Hi };
	return DAG.getMergeValues(Ops, dl);
	}

	SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Src = Op.getOperand(0);
	MVT SrcVT = Src.getSimpleValueType();
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (SrcVT.isVector()) {
	if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
	return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
	DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
	DAG.getUNDEF(SrcVT)));
	}
	if (SrcVT.getVectorElementType() == MVT::i1) {
	if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT))
	return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
	DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src));
	MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
	return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
	DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
	}
	return SDValue();
	}

	assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
	"Unknown SINT_TO_FP to lower!");

	// These are really Legal; return the operand so the caller accepts it as
	// Legal.
	if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
	return Op;
	if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
	Subtarget.is64Bit()) {
	return Op;
	}

	SDValue ValueToStore = Op.getOperand(0);
	if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
	!Subtarget.is64Bit())
	// Bitcasting to f64 here allows us to do a single 64-bit store from
	// an SSE register, avoiding the store forwarding penalty that would come
	// with two 32-bit stores.
	ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);

	unsigned Size = SrcVT.getSizeInBits()/8;
	MachineFunction &MF = DAG.getMachineFunction();
	auto PtrVT = getPointerTy(MF.getDataLayout());
	int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false);
	SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
	SDValue Chain = DAG.getStore(
	DAG.getEntryNode(), dl, ValueToStore, StackSlot,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
	return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
	}

	SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
	SDValue StackSlot,
	SelectionDAG &DAG) const {
	// Build the FILD
	SDLoc DL(Op);
	SDVTList Tys;
	bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
	if (useSSE)
	Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
	else
	Tys = DAG.getVTList(Op.getValueType(), MVT::Other);

	unsigned ByteSize = SrcVT.getSizeInBits()/8;

	FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
	MachineMemOperand *MMO;
	if (FI) {
	int SSFI = FI->getIndex();
	MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
	MachineMemOperand::MOLoad, ByteSize, ByteSize);
	} else {
	MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
	StackSlot = StackSlot.getOperand(1);
	}
	SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
	SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
	X86ISD::FILD, DL,
	Tys, Ops, SrcVT, MMO);

	if (useSSE) {
	Chain = Result.getValue(1);
	SDValue InFlag = Result.getValue(2);

	// FIXME: Currently the FST is flagged to the FILD_FLAG. This
	// shouldn't be necessary except that RFP cannot be live across
	// multiple blocks. When stackifier is fixed, they can be uncoupled.
	MachineFunction &MF = DAG.getMachineFunction();
	unsigned SSFISize = Op.getValueSizeInBits()/8;
	int SSFI = MF.getFrameInfo().CreateStackObject(SSFISize, SSFISize, false);
	auto PtrVT = getPointerTy(MF.getDataLayout());
	SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
	Tys = DAG.getVTList(MVT::Other);
	SDValue Ops[] = {
	Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
	};
	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
	MachineMemOperand::MOStore, SSFISize, SSFISize);

	Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
	Ops, Op.getValueType(), MMO);
	Result = DAG.getLoad(
	Op.getValueType(), DL, Chain, StackSlot,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
	}

	return Result;
	}

	/// 64-bit unsigned integer to double expansion.
	SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
	SelectionDAG &DAG) const {
	// This algorithm is not obvious. Here it is what we're trying to output:
	/*
	movq %rax, %xmm0
	punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U }
	subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 }
	#ifdef __SSE3__
	haddpd %xmm0, %xmm0
	#else
	pshufd $0x4e, %xmm0, %xmm1
	addpd %xmm1, %xmm0
	#endif
	*/

	SDLoc dl(Op);
	LLVMContext *Context = DAG.getContext();

	// Build some magic constants.
	static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
	Constant C0 = ConstantDataVector::get(Context, CV0);
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16);

	SmallVector<Constant*,2> CV1;
	CV1.push_back(
	ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
	APInt(64, 0x4330000000000000ULL))));
	CV1.push_back(
	ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
	APInt(64, 0x4530000000000000ULL))));
	Constant *C1 = ConstantVector::get(CV1);
	SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);

	// Load the 64-bit value into an XMM register.
	SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
	Op.getOperand(0));
	SDValue CLod0 =
	DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
	/* Alignment = */ 16);
	SDValue Unpck1 =
	getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0);

	SDValue CLod1 =
	DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
	/* Alignment = */ 16);
	SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
	// TODO: Are there any fast-math-flags to propagate here?
	SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
	SDValue Result;

	if (Subtarget.hasSSE3()) {
	// FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
	Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
	} else {
	SDValue S2F = DAG.getBitcast(MVT::v4i32, Sub);
	SDValue Shuffle = DAG.getVectorShuffle(MVT::v4i32, dl, S2F, S2F, {2,3,0,1});
	Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
	DAG.getBitcast(MVT::v2f64, Shuffle), Sub);
	}

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
	DAG.getIntPtrConstant(0, dl));
	}

	/// 32-bit unsigned integer to float expansion.
	SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	// FP constant to bias correct the final result.
	SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
	MVT::f64);

	// Load the 32-bit value into an XMM register.
	SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
	Op.getOperand(0));

	// Zero out the upper parts of the register.
	Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);

	Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
	DAG.getBitcast(MVT::v2f64, Load),
	DAG.getIntPtrConstant(0, dl));

	// Or the load with the bias.
	SDValue Or = DAG.getNode(
	ISD::OR, dl, MVT::v2i64,
	DAG.getBitcast(MVT::v2i64,
	DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Load)),
	DAG.getBitcast(MVT::v2i64,
	DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias)));
	Or =
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
	DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));

	// Subtract the bias.
	// TODO: Are there any fast-math-flags to propagate here?
	SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);

	// Handle final rounding.
	MVT DestVT = Op.getSimpleValueType();

	if (DestVT.bitsLT(MVT::f64))
	return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
	DAG.getIntPtrConstant(0, dl));
	if (DestVT.bitsGT(MVT::f64))
	return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);

	// Handle final rounding.
	return Sub;
	}

	static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget, SDLoc &DL) {
	if (Op.getSimpleValueType() != MVT::v2f64)
	return SDValue();

	SDValue N0 = Op.getOperand(0);
	assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type");

	// Legalize to v4i32 type.
	N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
	DAG.getUNDEF(MVT::v2i32));

	if (Subtarget.hasAVX512())
	return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0);

	// Same implementation as VectorLegalizer::ExpandUINT_TO_FLOAT,
	// but using v2i32 to v2f64 with X86ISD::CVTSI2P.
	SDValue HalfWord = DAG.getConstant(16, DL, MVT::v4i32);
	SDValue HalfWordMask = DAG.getConstant(0x0000FFFF, DL, MVT::v4i32);

	// Two to the power of half-word-size.
	SDValue TWOHW = DAG.getConstantFP(1 << 16, DL, MVT::v2f64);

	// Clear upper part of LO, lower HI.
	SDValue HI = DAG.getNode(ISD::SRL, DL, MVT::v4i32, N0, HalfWord);
	SDValue LO = DAG.getNode(ISD::AND, DL, MVT::v4i32, N0, HalfWordMask);

	SDValue fHI = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, HI);
	fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW);
	SDValue fLO = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, LO);

	// Add the two halves.
	return DAG.getNode(ISD::FADD, DL, MVT::v2f64, fHI, fLO);
	}

	static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// The algorithm is the following:
	// #ifdef __SSE4_1__
	// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
	// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
	// (uint4) 0x53000000, 0xaa);
	// #else
	// uint4 lo = (v & (uint4) 0xffff) \| (uint4) 0x4b000000;
	// uint4 hi = (v >> 16) \| (uint4) 0x53000000;
	// #endif
	// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
	// return (float4) lo + fhi;

	// We shouldn't use it when unsafe-fp-math is enabled though: we might later
	// reassociate the two FADDs, and if we do that, the algorithm fails
	// spectacularly (PR24512).
	// FIXME: If we ever have some kind of Machine FMF, this should be marked
	// as non-fast and always be enabled. Why isn't SDAG FMF enough? Because
	// there's also the MachineCombiner reassociations happening on Machine IR.
	if (DAG.getTarget().Options.UnsafeFPMath)
	return SDValue();

	SDLoc DL(Op);
	SDValue V = Op->getOperand(0);
	MVT VecIntVT = V.getSimpleValueType();
	bool Is128 = VecIntVT == MVT::v4i32;
	MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
	// If we convert to something else than the supported type, e.g., to v4f64,
	// abort early.
	if (VecFloatVT != Op->getSimpleValueType(0))
	return SDValue();

	assert((VecIntVT == MVT::v4i32 \|\| VecIntVT == MVT::v8i32) &&
	"Unsupported custom type");

	// In the #idef/#else code, we have in common:
	// - The vector of constants:
	// -- 0x4b000000
	// -- 0x53000000
	// - A shift:
	// -- v >> 16

	// Create the splat vector for 0x4b000000.
	SDValue VecCstLow = DAG.getConstant(0x4b000000, DL, VecIntVT);
	// Create the splat vector for 0x53000000.
	SDValue VecCstHigh = DAG.getConstant(0x53000000, DL, VecIntVT);

	// Create the right shift.
	SDValue VecCstShift = DAG.getConstant(16, DL, VecIntVT);
	SDValue HighShift = DAG.getNode(ISD::SRL, DL, VecIntVT, V, VecCstShift);

	SDValue Low, High;
	if (Subtarget.hasSSE41()) {
	MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
	// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
	SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow);
	SDValue VecBitcast = DAG.getBitcast(VecI16VT, V);
	// Low will be bitcasted right away, so do not bother bitcasting back to its
	// original type.
	Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
	VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32));
	// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
	// (uint4) 0x53000000, 0xaa);
	SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh);
	SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift);
	// High will be bitcasted right away, so do not bother bitcasting back to
	// its original type.
	High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
	VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i32));
	} else {
	SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT);
	// uint4 lo = (v & (uint4) 0xffff) \| (uint4) 0x4b000000;
	SDValue LowAnd = DAG.getNode(ISD::AND, DL, VecIntVT, V, VecCstMask);
	Low = DAG.getNode(ISD::OR, DL, VecIntVT, LowAnd, VecCstLow);

	// uint4 hi = (v >> 16) \| (uint4) 0x53000000;
	High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh);
	}

	// Create the vector constant for -(0x1.0p39f + 0x1.0p23f).
	SDValue VecCstFAdd = DAG.getConstantFP(
	APFloat(APFloat::IEEEsingle(), APInt(32, 0xD3000080)), DL, VecFloatVT);

	// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
	SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
	// TODO: Are there any fast-math-flags to propagate here?
	SDValue FHigh =
	DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
	// return (float4) lo + fhi;
	SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low);
	return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
	}

	SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue N0 = Op.getOperand(0);
	MVT SrcVT = N0.getSimpleValueType();
	SDLoc dl(Op);

	if (SrcVT.getVectorElementType() == MVT::i1) {
	if (SrcVT == MVT::v2i1)
	return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
	DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
	MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
	return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
	DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
	}

	switch (SrcVT.SimpleTy) {
	default:
	llvm_unreachable("Custom UINT_TO_FP is not supported!");
	case MVT::v4i8:
	case MVT::v4i16:
	case MVT::v8i8:
	case MVT::v8i16: {
	MVT NVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
	return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
	DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
	}
	case MVT::v2i32:
	return lowerUINT_TO_FP_v2i32(Op, DAG, Subtarget, dl);
	case MVT::v4i32:
	case MVT::v8i32:
	return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget);
	case MVT::v16i8:
	case MVT::v16i16:
	assert(Subtarget.hasAVX512());
	return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
	DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0));
	}
	}

	SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue N0 = Op.getOperand(0);
	SDLoc dl(Op);
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
	// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
	// the optimization here.
	if (DAG.SignBitIsZero(N0))
	return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);

	if (Op.getSimpleValueType().isVector())
	return lowerUINT_TO_FP_vec(Op, DAG);

	MVT SrcVT = N0.getSimpleValueType();
	MVT DstVT = Op.getSimpleValueType();

	if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
	(SrcVT == MVT::i32 \|\| (SrcVT == MVT::i64 && Subtarget.is64Bit()))) {
	// Conversions from unsigned i32 to f32/f64 are legal,
	// using VCVTUSI2SS/SD. Same for i64 in 64-bit mode.
	return Op;
	}

	if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
	return LowerUINT_TO_FP_i64(Op, DAG);
	if (SrcVT == MVT::i32 && X86ScalarSSEf64)
	return LowerUINT_TO_FP_i32(Op, DAG);
	if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
	return SDValue();

	// Make a 64-bit buffer, and use it to build an FILD.
	SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
	if (SrcVT == MVT::i32) {
	SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
	SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
	StackSlot, MachinePointerInfo());
	SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
	OffsetSlot, MachinePointerInfo());
	SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
	return Fild;
	}

	assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
	SDValue ValueToStore = Op.getOperand(0);
	if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit())
	// Bitcasting to f64 here allows us to do a single 64-bit store from
	// an SSE register, avoiding the store forwarding penalty that would come
	// with two 32-bit stores.
	ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
	SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore, StackSlot,
	MachinePointerInfo());
	// For i64 source, we need to add the appropriate power of 2 if the input
	// was negative. This is the same as the optimization in
	// DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
	// we must be careful to do the computation in x87 extended precision, not
	// in SSE. (The generic code can't know it's OK to do this, or how to.)
	int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
	MachineMemOperand::MOLoad, 8, 8);

	SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
	SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
	SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
	MVT::i64, MMO);

	APInt FF(32, 0x5F800000ULL);

	// Check whether the sign bit is set.
	SDValue SignSet = DAG.getSetCC(
	dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
	Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);

	// Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
	SDValue FudgePtr = DAG.getConstantPool(
	ConstantInt::get(*DAG.getContext(), FF.zext(64)), PtrVT);

	// Get a pointer to FF if the sign bit was set, or to 0 otherwise.
	SDValue Zero = DAG.getIntPtrConstant(0, dl);
	SDValue Four = DAG.getIntPtrConstant(4, dl);
	SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four);
	FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset);

	// Load the value out, extending it from f32 to f80.
	// FIXME: Avoid the extend by constructing the right constant pool?
	SDValue Fudge = DAG.getExtLoad(
	ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr,
	MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
	/* Alignment = */ 4);
	// Extend everything to 80 bits to force it to be done on x87.
	// TODO: Are there any fast-math-flags to propagate here?
	SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
	return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
	DAG.getIntPtrConstant(0, dl));
	}

	// If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation
	// is legal, or has an fp128 or f16 source (which needs to be promoted to f32),
	// just return an <SDValue(), SDValue()> pair.
	// Otherwise it is assumed to be a conversion from one of f32, f64 or f80
	// to i16, i32 or i64, and we lower it to a legal sequence.
	// If lowered to the final integer result we return a <result, SDValue()> pair.
	// Otherwise we lower it to a sequence ending with a FIST, return a
	// <FIST, StackSlot> pair, and the caller is responsible for loading
	// the final integer result from StackSlot.
	std::pair<SDValue,SDValue>
	X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
	bool IsSigned, bool IsReplace) const {
	SDLoc DL(Op);

	EVT DstTy = Op.getValueType();
	EVT TheVT = Op.getOperand(0).getValueType();
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) {
	// f16 must be promoted before using the lowering in this routine.
	// fp128 does not use this lowering.
	return std::make_pair(SDValue(), SDValue());
	}

	// If using FIST to compute an unsigned i64, we'll need some fixup
	// to handle values above the maximum signed i64. A FIST is always
	// used for the 32-bit subtarget, but also for f80 on a 64-bit target.
	bool UnsignedFixup = !IsSigned &&
	DstTy == MVT::i64 &&
	(!Subtarget.is64Bit() \|\|
	!isScalarFPTypeInSSEReg(TheVT));

	if (!IsSigned && DstTy != MVT::i64 && !Subtarget.hasAVX512()) {
	// Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
	// The low 32 bits of the fist result will have the correct uint32 result.
	assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
	DstTy = MVT::i64;
	}

	assert(DstTy.getSimpleVT() <= MVT::i64 &&
	DstTy.getSimpleVT() >= MVT::i16 &&
	"Unknown FP_TO_INT to lower!");

	// These are really Legal.
	if (DstTy == MVT::i32 &&
	isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
	return std::make_pair(SDValue(), SDValue());
	if (Subtarget.is64Bit() &&
	DstTy == MVT::i64 &&
	isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
	return std::make_pair(SDValue(), SDValue());

	// We lower FP->int64 into FISTP64 followed by a load from a temporary
	// stack slot.
	MachineFunction &MF = DAG.getMachineFunction();
	unsigned MemSize = DstTy.getSizeInBits()/8;
	int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
	SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);

	unsigned Opc;
	switch (DstTy.getSimpleVT().SimpleTy) {
	default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
	case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
	case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
	case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
	}

	SDValue Chain = DAG.getEntryNode();
	SDValue Value = Op.getOperand(0);
	SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.

	if (UnsignedFixup) {
	//
	// Conversion to unsigned i64 is implemented with a select,
	// depending on whether the source value fits in the range
	// of a signed i64. Let Thresh be the FP equivalent of
	// 0x8000000000000000ULL.
	//
	// Adjust i32 = (Value < Thresh) ? 0 : 0x80000000;
	// FistSrc = (Value < Thresh) ? Value : (Value - Thresh);
	// Fist-to-mem64 FistSrc
	// Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent
	// to XOR'ing the high 32 bits with Adjust.
	//
	// Being a power of 2, Thresh is exactly representable in all FP formats.
	// For X87 we'd like to use the smallest FP type for this constant, but
	// for DAG type consistency we have to match the FP operand type.

	APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000));
	LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK;
	bool LosesInfo = false;
	if (TheVT == MVT::f64)
	// The rounding mode is irrelevant as the conversion should be exact.
	Status = Thresh.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
	&LosesInfo);
	else if (TheVT == MVT::f80)
	Status = Thresh.convert(APFloat::x87DoubleExtended(),
	APFloat::rmNearestTiesToEven, &LosesInfo);

	assert(Status == APFloat::opOK && !LosesInfo &&
	"FP conversion should have been exact");

	SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);

	SDValue Cmp = DAG.getSetCC(DL,
	getSetCCResultType(DAG.getDataLayout(),
	*DAG.getContext(), TheVT),
	Value, ThreshVal, ISD::SETLT);
	Adjust = DAG.getSelect(DL, MVT::i32, Cmp,
	DAG.getConstant(0, DL, MVT::i32),
	DAG.getConstant(0x80000000, DL, MVT::i32));
	SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
	Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
	*DAG.getContext(), TheVT),
	Value, ThreshVal, ISD::SETLT);
	Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
	}

	// FIXME This causes a redundant load/store if the SSE-class value is already
	// in memory, such as if it is on the callstack.
	if (isScalarFPTypeInSSEReg(TheVT)) {
	assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
	Chain = DAG.getStore(Chain, DL, Value, StackSlot,
	MachinePointerInfo::getFixedStack(MF, SSFI));
	SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
	SDValue Ops[] = {
	Chain, StackSlot, DAG.getValueType(TheVT)
	};

	MachineMemOperand *MMO =
	MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
	MachineMemOperand::MOLoad, MemSize, MemSize);
	Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
	Chain = Value.getValue(1);
	SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
	StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
	}

	MachineMemOperand *MMO =
	MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
	MachineMemOperand::MOStore, MemSize, MemSize);

	if (UnsignedFixup) {

	// Insert the FIST, load its result as two i32's,
	// and XOR the high i32 with Adjust.

	SDValue FistOps[] = { Chain, Value, StackSlot };
	SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
	FistOps, DstTy, MMO);

	SDValue Low32 =
	DAG.getLoad(MVT::i32, DL, FIST, StackSlot, MachinePointerInfo());
	SDValue HighAddr = DAG.getMemBasePlusOffset(StackSlot, 4, DL);

	SDValue High32 =
	DAG.getLoad(MVT::i32, DL, FIST, HighAddr, MachinePointerInfo());
	High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);

	if (Subtarget.is64Bit()) {
	// Join High32 and Low32 into a 64-bit result.
	// (High32 << 32) \| Low32
	Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
	High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32);
	High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32,
	DAG.getConstant(32, DL, MVT::i8));
	SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32);
	return std::make_pair(Result, SDValue());
	}

	SDValue ResultOps[] = { Low32, High32 };

	SDValue pair = IsReplace
	? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps)
	: DAG.getMergeValues(ResultOps, DL);
	return std::make_pair(pair, SDValue());
	} else {
	// Build the FP_TO_INT*_IN_MEM
	SDValue Ops[] = { Chain, Value, StackSlot };
	SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
	Ops, DstTy, MMO);
	return std::make_pair(FIST, StackSlot);
	}
	}

	static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = Op->getSimpleValueType(0);
	SDValue In = Op->getOperand(0);
	MVT InVT = In.getSimpleValueType();
	SDLoc dl(Op);

	if (VT.is512BitVector() \|\| InVT.getVectorElementType() == MVT::i1)
	return DAG.getNode(ISD::ZERO_EXTEND, dl, VT, In);

	// Optimize vectors in AVX mode:
	//
	// v8i16 -> v8i32
	// Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
	// Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
	// Concat upper and lower parts.
	//
	// v4i32 -> v4i64
	// Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
	// Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
	// Concat upper and lower parts.
	//

	if (((VT != MVT::v16i16) \|\| (InVT != MVT::v16i8)) &&
	((VT != MVT::v8i32) \|\| (InVT != MVT::v8i16)) &&
	((VT != MVT::v4i64) \|\| (InVT != MVT::v4i32)))
	return SDValue();

	if (Subtarget.hasInt256())
	return DAG.getNode(X86ISD::VZEXT, dl, VT, In);

	SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
	SDValue Undef = DAG.getUNDEF(InVT);
	bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
	SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
	SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);

	MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
	VT.getVectorNumElements()/2);

	OpLo = DAG.getBitcast(HVT, OpLo);
	OpHi = DAG.getBitcast(HVT, OpHi);

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
	}

	static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
	const X86Subtarget &Subtarget, SelectionDAG &DAG) {
	MVT VT = Op->getSimpleValueType(0);
	SDValue In = Op->getOperand(0);
	MVT InVT = In.getSimpleValueType();
	SDLoc DL(Op);
	unsigned NumElts = VT.getVectorNumElements();

	if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1 &&
	(NumElts == 8 \|\| NumElts == 16 \|\| Subtarget.hasBWI()))
	return DAG.getNode(X86ISD::VZEXT, DL, VT, In);

	if (InVT.getVectorElementType() != MVT::i1)
	return SDValue();

	// Extend VT if the target is 256 or 128bit vector and VLX is not supported.
	MVT ExtVT = VT;
	if (!VT.is512BitVector() && !Subtarget.hasVLX())
	ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);

	SDValue One =
	DAG.getConstant(APInt(ExtVT.getScalarSizeInBits(), 1), DL, ExtVT);
	SDValue Zero =
	DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT);

	SDValue SelectedVal = DAG.getSelect(DL, ExtVT, In, One, Zero);
	if (VT == ExtVT)
	return SelectedVal;
	return DAG.getNode(X86ISD::VTRUNC, DL, VT, SelectedVal);
	}

	static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	if (Subtarget.hasFp256())
	if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
	return Res;

	return SDValue();
	}

	static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc DL(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue In = Op.getOperand(0);
	MVT SVT = In.getSimpleValueType();

	if (VT.is512BitVector() \|\| SVT.getVectorElementType() == MVT::i1)
	return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG);

	if (Subtarget.hasFp256())
	if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
	return Res;

	assert(!VT.is256BitVector() \|\| !SVT.is128BitVector() \|\|
	VT.getVectorNumElements() != SVT.getVectorNumElements());
	return SDValue();
	}

	/// Helper to recursively truncate vector elements in half with PACKSS.
	/// It makes use of the fact that vector comparison results will be all-zeros
	/// or all-ones to use (vXi8 PACKSS(vYi16, vYi16)) instead of matching types.
	/// AVX2 (Int256) sub-targets require extra shuffling as the PACKSS operates
	/// within each 128-bit lane.
	static SDValue truncateVectorCompareWithPACKSS(EVT DstVT, SDValue In,
	const SDLoc &DL,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// Requires SSE2 but AVX512 has fast truncate.
	if (!Subtarget.hasSSE2() \|\| Subtarget.hasAVX512())
	return SDValue();

	EVT SrcVT = In.getValueType();

	// No truncation required, we might get here due to recursive calls.
	if (SrcVT == DstVT)
	return In;

	// We only support vector truncation to 128bits or greater from a
	// 256bits or greater source.
	if ((DstVT.getSizeInBits() % 128) != 0)
	return SDValue();
	if ((SrcVT.getSizeInBits() % 256) != 0)
	return SDValue();

	unsigned NumElems = SrcVT.getVectorNumElements();
	assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
	assert(SrcVT.getSizeInBits() > DstVT.getSizeInBits() && "Illegal truncation");

	EVT PackedSVT =
	EVT::getIntegerVT(*DAG.getContext(), SrcVT.getScalarSizeInBits() / 2);

	// Extract lower/upper subvectors.
	unsigned NumSubElts = NumElems / 2;
	unsigned SrcSizeInBits = SrcVT.getSizeInBits();
	SDValue Lo = extractSubVector(In, 0 * NumSubElts, DAG, DL, SrcSizeInBits / 2);
	SDValue Hi = extractSubVector(In, 1 * NumSubElts, DAG, DL, SrcSizeInBits / 2);

	// 256bit -> 128bit truncate - PACKSS lower/upper 128-bit subvectors.
	if (SrcVT.is256BitVector()) {
	Lo = DAG.getBitcast(MVT::v8i16, Lo);
	Hi = DAG.getBitcast(MVT::v8i16, Hi);
	SDValue Res = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, Lo, Hi);
	return DAG.getBitcast(DstVT, Res);
	}

	// AVX2: 512bit -> 256bit truncate - PACKSS lower/upper 256-bit subvectors.
	// AVX2: 512bit -> 128bit truncate - PACKSS(PACKSS, PACKSS).
	if (SrcVT.is512BitVector() && Subtarget.hasInt256()) {
	Lo = DAG.getBitcast(MVT::v16i16, Lo);
	Hi = DAG.getBitcast(MVT::v16i16, Hi);
	SDValue Res = DAG.getNode(X86ISD::PACKSS, DL, MVT::v32i8, Lo, Hi);

	// 256-bit PACKSS(ARG0, ARG1) leaves us with ((LO0,LO1),(HI0,HI1)),
	// so we need to shuffle to get ((LO0,HI0),(LO1,HI1)).
	Res = DAG.getBitcast(MVT::v4i64, Res);
	Res = DAG.getVectorShuffle(MVT::v4i64, DL, Res, Res, {0, 2, 1, 3});

	if (DstVT.is256BitVector())
	return DAG.getBitcast(DstVT, Res);

	// If 512bit -> 128bit truncate another stage.
	EVT PackedVT = EVT::getVectorVT(*DAG.getContext(), PackedSVT, NumElems);
	Res = DAG.getBitcast(PackedVT, Res);
	return truncateVectorCompareWithPACKSS(DstVT, Res, DL, DAG, Subtarget);
	}

	// Recursively pack lower/upper subvectors, concat result and pack again.
	assert(SrcVT.getSizeInBits() >= 512 && "Expected 512-bit vector or greater");
	EVT PackedVT = EVT::getVectorVT(*DAG.getContext(), PackedSVT, NumElems / 2);
	Lo = truncateVectorCompareWithPACKSS(PackedVT, Lo, DL, DAG, Subtarget);
	Hi = truncateVectorCompareWithPACKSS(PackedVT, Hi, DL, DAG, Subtarget);

	PackedVT = EVT::getVectorVT(*DAG.getContext(), PackedSVT, NumElems);
	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
	return truncateVectorCompareWithPACKSS(DstVT, Res, DL, DAG, Subtarget);
	}

	static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {

	SDLoc DL(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue In = Op.getOperand(0);
	MVT InVT = In.getSimpleValueType();

	assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type.");

	// Shift LSB to MSB and use VPMOVB/W2M or TESTD/Q.
	unsigned ShiftInx = InVT.getScalarSizeInBits() - 1;
	if (InVT.getScalarSizeInBits() <= 16) {
	if (Subtarget.hasBWI()) {
	// legal, will go to VPMOVB2M, VPMOVW2M
	// Shift packed bytes not supported natively, bitcast to word
	MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
	SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT,
	DAG.getBitcast(ExtVT, In),
	DAG.getConstant(ShiftInx, DL, ExtVT));
	ShiftNode = DAG.getBitcast(InVT, ShiftNode);
	return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
	}
	// Use TESTD/Q, extended vector to packed dword/qword.
	assert((InVT.is256BitVector() \|\| InVT.is128BitVector()) &&
	"Unexpected vector type.");
	unsigned NumElts = InVT.getVectorNumElements();
	MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
	In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
	InVT = ExtVT;
	ShiftInx = InVT.getScalarSizeInBits() - 1;
	}

	SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
	DAG.getConstant(ShiftInx, DL, InVT));
	return DAG.getNode(X86ISD::TESTM, DL, VT, ShiftNode, ShiftNode);
	}

	SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue In = Op.getOperand(0);
	MVT InVT = In.getSimpleValueType();

	if (VT == MVT::i1) {
	assert((InVT.isInteger() && (InVT.getSizeInBits() <= 64)) &&
	"Invalid scalar TRUNCATE operation");
	if (InVT.getSizeInBits() >= 32)
	return SDValue();
	In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
	return DAG.getNode(ISD::TRUNCATE, DL, VT, In);
	}
	assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
	"Invalid TRUNCATE operation");

	if (VT.getVectorElementType() == MVT::i1)
	return LowerTruncateVecI1(Op, DAG, Subtarget);

	// vpmovqb/w/d, vpmovdb/w, vpmovwb
	if (Subtarget.hasAVX512()) {
	// word to byte only under BWI
	if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) // v16i16 -> v16i8
	return DAG.getNode(X86ISD::VTRUNC, DL, VT,
	getExtendInVec(X86ISD::VSEXT, DL, MVT::v16i32, In, DAG));
	return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
	}

	// Truncate with PACKSS if we are truncating a vector zero/all-bits result.
	if (InVT.getScalarSizeInBits() == DAG.ComputeNumSignBits(In))
	if (SDValue V = truncateVectorCompareWithPACKSS(VT, In, DL, DAG, Subtarget))
	return V;

	if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
	// On AVX2, v4i64 -> v4i32 becomes VPERMD.
	if (Subtarget.hasInt256()) {
	static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
	In = DAG.getBitcast(MVT::v8i32, In);
	In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
	DAG.getIntPtrConstant(0, DL));
	}

	SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
	DAG.getIntPtrConstant(0, DL));
	SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
	DAG.getIntPtrConstant(2, DL));
	OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
	OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
	static const int ShufMask[] = {0, 2, 4, 6};
	return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask);
	}

	if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
	// On AVX2, v8i32 -> v8i16 becomes PSHUFB.
	if (Subtarget.hasInt256()) {
	In = DAG.getBitcast(MVT::v32i8, In);

	// The PSHUFB mask:
	static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13,
	-1, -1, -1, -1, -1, -1, -1, -1,
	16, 17, 20, 21, 24, 25, 28, 29,
	-1, -1, -1, -1, -1, -1, -1, -1 };
	In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1);
	In = DAG.getBitcast(MVT::v4i64, In);

	static const int ShufMask2[] = {0, 2, -1, -1};
	In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2);
	In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
	DAG.getIntPtrConstant(0, DL));
	return DAG.getBitcast(VT, In);
	}

	SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
	DAG.getIntPtrConstant(0, DL));

	SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
	DAG.getIntPtrConstant(4, DL));

	OpLo = DAG.getBitcast(MVT::v16i8, OpLo);
	OpHi = DAG.getBitcast(MVT::v16i8, OpHi);

	// The PSHUFB mask:
	static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
	-1, -1, -1, -1, -1, -1, -1, -1};

	OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1);
	OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1);

	OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
	OpHi = DAG.getBitcast(MVT::v4i32, OpHi);

	// The MOVLHPS Mask:
	static const int ShufMask2[] = {0, 1, 4, 5};
	SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2);
	return DAG.getBitcast(MVT::v8i16, res);
	}

	// Handle truncation of V256 to V128 using shuffles.
	if (!VT.is128BitVector() \|\| !InVT.is256BitVector())
	return SDValue();

	assert(Subtarget.hasFp256() && "256-bit vector without AVX!");

	unsigned NumElems = VT.getVectorNumElements();
	MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2);

	SmallVector<int, 16> MaskVec(NumElems * 2, -1);
	// Prepare truncation shuffle mask
	for (unsigned i = 0; i != NumElems; ++i)
	MaskVec[i] = i * 2;
	In = DAG.getBitcast(NVT, In);
	SDValue V = DAG.getVectorShuffle(NVT, DL, In, In, MaskVec);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V,
	DAG.getIntPtrConstant(0, DL));
	}

	SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
	MVT VT = Op.getSimpleValueType();

	if (VT.isVector()) {
	assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
	SDValue Src = Op.getOperand(0);
	SDLoc dl(Op);
	if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) {
	return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT,
	DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
	DAG.getUNDEF(MVT::v2f32)));
	}

	return SDValue();
	}

	assert(!VT.isVector());

	std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
	IsSigned, /IsReplace=/ false);
	SDValue FIST = Vals.first, StackSlot = Vals.second;
	// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
	if (!FIST.getNode())
	return Op;

	if (StackSlot.getNode())
	// Load the result.
	return DAG.getLoad(VT, SDLoc(Op), FIST, StackSlot, MachinePointerInfo());

	// The node is the result.
	return FIST;
	}

	static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
	SDLoc DL(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue In = Op.getOperand(0);
	MVT SVT = In.getSimpleValueType();

	assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");

	return DAG.getNode(X86ISD::VFPEXT, DL, VT,
	DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
	In, DAG.getUNDEF(SVT)));
	}

	/// The only differences between FABS and FNEG are the mask and the logic op.
	/// FNEG also has a folding opportunity for FNEG(FABS(x)).
	static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
	assert((Op.getOpcode() == ISD::FABS \|\| Op.getOpcode() == ISD::FNEG) &&
	"Wrong opcode for lowering FABS or FNEG.");

	bool IsFABS = (Op.getOpcode() == ISD::FABS);

	// If this is a FABS and it has an FNEG user, bail out to fold the combination
	// into an FNABS. We'll lower the FABS after that if it is still in use.
	if (IsFABS)
	for (SDNode *User : Op->uses())
	if (User->getOpcode() == ISD::FNEG)
	return Op;

	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();

	bool IsF128 = (VT == MVT::f128);

	// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
	// decide if we should generate a 16-byte constant mask when we only need 4 or
	// 8 bytes for the scalar case.

	MVT LogicVT;
	MVT EltVT;

	if (VT.isVector()) {
	LogicVT = VT;
	EltVT = VT.getVectorElementType();
	} else if (IsF128) {
	// SSE instructions are used for optimized f128 logical operations.
	LogicVT = MVT::f128;
	EltVT = VT;
	} else {
	// There are no scalar bitwise logical SSE/AVX instructions, so we
	// generate a 16-byte vector constant and logic op even for the scalar case.
	// Using a 16-byte mask allows folding the load of the mask with
	// the logic op, so it can save (~4 bytes) on code size.
	LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
	EltVT = VT;
	}

	unsigned EltBits = EltVT.getSizeInBits();
	// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
	APInt MaskElt =
	IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits);
	const fltSemantics &Sem =
	EltVT == MVT::f64 ? APFloat::IEEEdouble() :
	(IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
	SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);

	SDValue Op0 = Op.getOperand(0);
	bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
	unsigned LogicOp =
	IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
	SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;

	if (VT.isVector() \|\| IsF128)
	return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);

	// For the scalar case extend to a 128-bit vector, perform the logic op,
	// and extract the scalar result back out.
	Operand = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Operand);
	SDValue LogicNode = DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, LogicNode,
	DAG.getIntPtrConstant(0, dl));
	}

	static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
	SDValue Mag = Op.getOperand(0);
	SDValue Sign = Op.getOperand(1);
	SDLoc dl(Op);

	// If the sign operand is smaller, extend it first.
	MVT VT = Op.getSimpleValueType();
	if (Sign.getSimpleValueType().bitsLT(VT))
	Sign = DAG.getNode(ISD::FP_EXTEND, dl, VT, Sign);

	// And if it is bigger, shrink it first.
	if (Sign.getSimpleValueType().bitsGT(VT))
	Sign = DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, DAG.getIntPtrConstant(1, dl));

	// At this point the operands and the result should have the same
	// type, and that won't be f80 since that is not custom lowered.
	bool IsF128 = (VT == MVT::f128);
	assert((VT == MVT::f64 \|\| VT == MVT::f32 \|\| VT == MVT::f128 \|\|
	VT == MVT::v2f64 \|\| VT == MVT::v4f64 \|\| VT == MVT::v4f32 \|\|
	VT == MVT::v8f32 \|\| VT == MVT::v8f64 \|\| VT == MVT::v16f32) &&
	"Unexpected type in LowerFCOPYSIGN");

	MVT EltVT = VT.getScalarType();
	const fltSemantics &Sem =
	EltVT == MVT::f64 ? APFloat::IEEEdouble()
	: (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());

	// Perform all scalar logic operations as 16-byte vectors because there are no
	// scalar FP logic instructions in SSE.
	// TODO: This isn't necessary. If we used scalar types, we might avoid some
	// unnecessary splats, but we might miss load folding opportunities. Should
	// this decision be based on OptimizeForSize?
	bool IsFakeVector = !VT.isVector() && !IsF128;
	MVT LogicVT = VT;
	if (IsFakeVector)
	LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;

	// The mask constants are automatically splatted for vector types.
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	SDValue SignMask = DAG.getConstantFP(
	APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
	SDValue MagMask = DAG.getConstantFP(
	APFloat(Sem, ~APInt::getSignMask(EltSizeInBits)), dl, LogicVT);

	// First, clear all bits but the sign bit from the second operand (sign).
	if (IsFakeVector)
	Sign = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Sign);
	SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Sign, SignMask);

	// Next, clear the sign bit from the first operand (magnitude).
	// TODO: If we had general constant folding for FP logic ops, this check
	// wouldn't be necessary.
	SDValue MagBits;
	if (ConstantFPSDNode *Op0CN = dyn_cast<ConstantFPSDNode>(Mag)) {
	APFloat APF = Op0CN->getValueAPF();
	APF.clearSign();
	MagBits = DAG.getConstantFP(APF, dl, LogicVT);
	} else {
	// If the magnitude operand wasn't a constant, we need to AND out the sign.
	if (IsFakeVector)
	Mag = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Mag);
	MagBits = DAG.getNode(X86ISD::FAND, dl, LogicVT, Mag, MagMask);
	}

	// OR the magnitude value with the sign bit.
	SDValue Or = DAG.getNode(X86ISD::FOR, dl, LogicVT, MagBits, SignBit);
	return !IsFakeVector ? Or : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or,
	DAG.getIntPtrConstant(0, dl));
	}

	static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
	SDValue N0 = Op.getOperand(0);
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();

	MVT OpVT = N0.getSimpleValueType();
	assert((OpVT == MVT::f32 \|\| OpVT == MVT::f64) &&
	"Unexpected type for FGETSIGN");

	// Lower ISD::FGETSIGN to (AND (X86ISD::MOVMSK ...) 1).
	MVT VecVT = (OpVT == MVT::f32 ? MVT::v4f32 : MVT::v2f64);
	SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, N0);
	Res = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32, Res);
	Res = DAG.getZExtOrTrunc(Res, dl, VT);
	Res = DAG.getNode(ISD::AND, dl, VT, Res, DAG.getConstant(1, dl, VT));
	return Res;
	}

	// Check whether an OR'd tree is PTEST-able.
	static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");

	if (!Subtarget.hasSSE41())
	return SDValue();

	if (!Op->hasOneUse())
	return SDValue();

	SDNode *N = Op.getNode();
	SDLoc DL(N);

	SmallVector<SDValue, 8> Opnds;
	DenseMap<SDValue, unsigned> VecInMap;
	SmallVector<SDValue, 8> VecIns;
	EVT VT = MVT::Other;

	// Recognize a special case where a vector is casted into wide integer to
	// test all 0s.
	Opnds.push_back(N->getOperand(0));
	Opnds.push_back(N->getOperand(1));

	for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
	SmallVectorImpl<SDValue>::const_iterator I = Opnds.begin() + Slot;
	// BFS traverse all OR'd operands.
	if (I->getOpcode() == ISD::OR) {
	Opnds.push_back(I->getOperand(0));
	Opnds.push_back(I->getOperand(1));
	// Re-evaluate the number of nodes to be traversed.
	e += 2; // 2 more nodes (LHS and RHS) are pushed.
	continue;
	}

	// Quit if a non-EXTRACT_VECTOR_ELT
	if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();

	// Quit if without a constant index.
	SDValue Idx = I->getOperand(1);
	if (!isa<ConstantSDNode>(Idx))
	return SDValue();

	SDValue ExtractedFromVec = I->getOperand(0);
	DenseMap<SDValue, unsigned>::iterator M = VecInMap.find(ExtractedFromVec);
	if (M == VecInMap.end()) {
	VT = ExtractedFromVec.getValueType();
	// Quit if not 128/256-bit vector.
	if (!VT.is128BitVector() && !VT.is256BitVector())
	return SDValue();
	// Quit if not the same type.
	if (VecInMap.begin() != VecInMap.end() &&
	VT != VecInMap.begin()->first.getValueType())
	return SDValue();
	M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first;
	VecIns.push_back(ExtractedFromVec);
	}
	M->second \|= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
	}

	assert((VT.is128BitVector() \|\| VT.is256BitVector()) &&
	"Not extracted from 128-/256-bit vector.");

	unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U;

	for (DenseMap<SDValue, unsigned>::const_iterator
	I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) {
	// Quit if not all elements are used.
	if (I->second != FullMask)
	return SDValue();
	}

	MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;

	// Cast all vectors into TestVT for PTEST.
	for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
	VecIns[i] = DAG.getBitcast(TestVT, VecIns[i]);

	// If more than one full vector is evaluated, OR them first before PTEST.
	for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
	// Each iteration will OR 2 nodes and append the result until there is only
	// 1 node left, i.e. the final OR'd value of all vectors.
	SDValue LHS = VecIns[Slot];
	SDValue RHS = VecIns[Slot + 1];
	VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
	}

	return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back());
	}

	/// \brief return true if \c Op has a use that doesn't just read flags.
	static bool hasNonFlagsUse(SDValue Op) {
	for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE;
	++UI) {
	SDNode User = UI;
	unsigned UOpNo = UI.getOperandNo();
	if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
	// Look pass truncate.
	UOpNo = User->use_begin().getOperandNo();
	User = *User->use_begin();
	}

	if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC &&
	!(User->getOpcode() == ISD::SELECT && UOpNo == 0))
	return true;
	}
	return false;
	}

	// Emit KTEST instruction for bit vectors on AVX-512
	static SDValue EmitKTEST(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (Op.getOpcode() == ISD::BITCAST) {
	auto hasKTEST = [&](MVT VT) {
	unsigned SizeInBits = VT.getSizeInBits();
	return (Subtarget.hasDQI() && (SizeInBits == 8 \|\| SizeInBits == 16)) \|\|
	(Subtarget.hasBWI() && (SizeInBits == 32 \|\| SizeInBits == 64));
	};
	SDValue Op0 = Op.getOperand(0);
	MVT Op0VT = Op0.getValueType().getSimpleVT();
	if (Op0VT.isVector() && Op0VT.getVectorElementType() == MVT::i1 &&
	hasKTEST(Op0VT))
	return DAG.getNode(X86ISD::KTEST, SDLoc(Op), Op0VT, Op0, Op0);
	}
	return SDValue();
	}

	/// Emit nodes that will be selected as "test Op0,Op0", or something
	/// equivalent.
	SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
	SelectionDAG &DAG) const {
	if (Op.getValueType() == MVT::i1) {
	SDValue ExtOp = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Op);
	return DAG.getNode(X86ISD::CMP, dl, MVT::i32, ExtOp,
	DAG.getConstant(0, dl, MVT::i8));
	}
	// CF and OF aren't always set the way we want. Determine which
	// of these we need.
	bool NeedCF = false;
	bool NeedOF = false;
	switch (X86CC) {
	default: break;
	case X86::COND_A: case X86::COND_AE:
	case X86::COND_B: case X86::COND_BE:
	NeedCF = true;
	break;
	case X86::COND_G: case X86::COND_GE:
	case X86::COND_L: case X86::COND_LE:
	case X86::COND_O: case X86::COND_NO: {
	// Check if we really need to set the
	// Overflow flag. If NoSignedWrap is present
	// that is not actually needed.
	switch (Op->getOpcode()) {
	case ISD::ADD:
	case ISD::SUB:
	case ISD::MUL:
	case ISD::SHL:
	if (Op.getNode()->getFlags().hasNoSignedWrap())
	break;
	LLVM_FALLTHROUGH;
	default:
	NeedOF = true;
	break;
	}
	break;
	}
	}
	// See if we can use the EFLAGS value from the operand instead of
	// doing a separate TEST. TEST always sets OF and CF to 0, so unless
	// we prove that the arithmetic won't overflow, we can't use OF or CF.
	if (Op.getResNo() != 0 \|\| NeedOF \|\| NeedCF) {
	// Emit KTEST for bit vectors
	if (auto Node = EmitKTEST(Op, DAG, Subtarget))
	return Node;
	// Emit a CMP with 0, which is the TEST pattern.
	return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
	DAG.getConstant(0, dl, Op.getValueType()));
	}
	unsigned Opcode = 0;
	unsigned NumOperands = 0;

	// Truncate operations may prevent the merge of the SETCC instruction
	// and the arithmetic instruction before it. Attempt to truncate the operands
	// of the arithmetic instruction and use a reduced bit-width instruction.
	bool NeedTruncation = false;
	SDValue ArithOp = Op;
	if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
	SDValue Arith = Op->getOperand(0);
	// Both the trunc and the arithmetic op need to have one user each.
	if (Arith->hasOneUse())
	switch (Arith.getOpcode()) {
	default: break;
	case ISD::ADD:
	case ISD::SUB:
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR: {
	NeedTruncation = true;
	ArithOp = Arith;
	}
	}
	}

	// Sometimes flags can be set either with an AND or with an SRL/SHL
	// instruction. SRL/SHL variant should be preferred for masks longer than this
	// number of bits.
	const int ShiftToAndMaxMaskWidth = 32;
	const bool ZeroCheck = (X86CC == X86::COND_E \|\| X86CC == X86::COND_NE);

	// NOTICE: In the code below we use ArithOp to hold the arithmetic operation
	// which may be the result of a CAST. We use the variable 'Op', which is the
	// non-casted variable when we check for possible users.
	switch (ArithOp.getOpcode()) {
	case ISD::ADD:
	// Due to an isel shortcoming, be conservative if this add is likely to be
	// selected as part of a load-modify-store instruction. When the root node
	// in a match is a store, isel doesn't know how to remap non-chain non-flag
	// uses of other nodes in the match, such as the ADD in this case. This
	// leads to the ADD being left around and reselected, with the result being
	// two adds in the output. Alas, even if none our users are stores, that
	// doesn't prove we're O.K. Ergo, if we have any parents that aren't
	// CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require
	// climbing the DAG back to the root, and it doesn't seem to be worth the
	// effort.
	for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
	UE = Op.getNode()->use_end(); UI != UE; ++UI)
	if (UI->getOpcode() != ISD::CopyToReg &&
	UI->getOpcode() != ISD::SETCC &&
	UI->getOpcode() != ISD::STORE)
	goto default_case;

	if (ConstantSDNode *C =
	dyn_cast<ConstantSDNode>(ArithOp.getOperand(1))) {
	// An add of one will be selected as an INC.
	if (C->isOne() && !Subtarget.slowIncDec()) {
	Opcode = X86ISD::INC;
	NumOperands = 1;
	break;
	}

	// An add of negative one (subtract of one) will be selected as a DEC.
	if (C->isAllOnesValue() && !Subtarget.slowIncDec()) {
	Opcode = X86ISD::DEC;
	NumOperands = 1;
	break;
	}
	}

	// Otherwise use a regular EFLAGS-setting add.
	Opcode = X86ISD::ADD;
	NumOperands = 2;
	break;
	case ISD::SHL:
	case ISD::SRL:
	// If we have a constant logical shift that's only used in a comparison
	// against zero turn it into an equivalent AND. This allows turning it into
	// a TEST instruction later.
	if (ZeroCheck && Op->hasOneUse() &&
	isa<ConstantSDNode>(Op->getOperand(1)) && !hasNonFlagsUse(Op)) {
	EVT VT = Op.getValueType();
	unsigned BitWidth = VT.getSizeInBits();
	unsigned ShAmt = Op->getConstantOperandVal(1);
	if (ShAmt >= BitWidth) // Avoid undefined shifts.
	break;
	APInt Mask = ArithOp.getOpcode() == ISD::SRL
	? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
	: APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
	if (!Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
	break;
	Op = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0),
	DAG.getConstant(Mask, dl, VT));
	}
	break;

	case ISD::AND:
	// If the primary 'and' result isn't used, don't bother using X86ISD::AND,
	// because a TEST instruction will be better. However, AND should be
	// preferred if the instruction can be combined into ANDN.
	if (!hasNonFlagsUse(Op)) {
	SDValue Op0 = ArithOp->getOperand(0);
	SDValue Op1 = ArithOp->getOperand(1);
	EVT VT = ArithOp.getValueType();
	bool isAndn = isBitwiseNot(Op0) \|\| isBitwiseNot(Op1);
	bool isLegalAndnType = VT == MVT::i32 \|\| VT == MVT::i64;
	bool isProperAndn = isAndn && isLegalAndnType && Subtarget.hasBMI();

	// If we cannot select an ANDN instruction, check if we can replace
	// AND+IMM64 with a shift before giving up. This is possible for masks
	// like 0xFF000000 or 0x00FFFFFF and if we care only about the zero flag.
	if (!isProperAndn) {
	if (!ZeroCheck)
	break;

	assert(!isa<ConstantSDNode>(Op0) && "AND node isn't canonicalized");
	auto *CN = dyn_cast<ConstantSDNode>(Op1);
	if (!CN)
	break;

	const APInt &Mask = CN->getAPIntValue();
	if (Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
	break; // Prefer TEST instruction.

	unsigned BitWidth = Mask.getBitWidth();
	unsigned LeadingOnes = Mask.countLeadingOnes();
	unsigned TrailingZeros = Mask.countTrailingZeros();

	if (LeadingOnes + TrailingZeros == BitWidth) {
	assert(TrailingZeros < VT.getSizeInBits() &&
	"Shift amount should be less than the type width");
	MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
	SDValue ShAmt = DAG.getConstant(TrailingZeros, dl, ShTy);
	Op = DAG.getNode(ISD::SRL, dl, VT, Op0, ShAmt);
	break;
	}

	unsigned LeadingZeros = Mask.countLeadingZeros();
	unsigned TrailingOnes = Mask.countTrailingOnes();

	if (LeadingZeros + TrailingOnes == BitWidth) {
	assert(LeadingZeros < VT.getSizeInBits() &&
	"Shift amount should be less than the type width");
	MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
	SDValue ShAmt = DAG.getConstant(LeadingZeros, dl, ShTy);
	Op = DAG.getNode(ISD::SHL, dl, VT, Op0, ShAmt);
	break;
	}

	break;
	}
	}
	LLVM_FALLTHROUGH;
	case ISD::SUB:
	case ISD::OR:
	case ISD::XOR:
	// Due to the ISEL shortcoming noted above, be conservative if this op is
	// likely to be selected as part of a load-modify-store instruction.
	for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
	UE = Op.getNode()->use_end(); UI != UE; ++UI)
	if (UI->getOpcode() == ISD::STORE)
	goto default_case;

	// Otherwise use a regular EFLAGS-setting instruction.
	switch (ArithOp.getOpcode()) {
	default: llvm_unreachable("unexpected operator!");
	case ISD::SUB: Opcode = X86ISD::SUB; break;
	case ISD::XOR: Opcode = X86ISD::XOR; break;
	case ISD::AND: Opcode = X86ISD::AND; break;
	case ISD::OR: {
	if (!NeedTruncation && ZeroCheck) {
	if (SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG))
	return EFLAGS;
	}
	Opcode = X86ISD::OR;
	break;
	}
	}

	NumOperands = 2;
	break;
	case X86ISD::ADD:
	case X86ISD::SUB:
	case X86ISD::INC:
	case X86ISD::DEC:
	case X86ISD::OR:
	case X86ISD::XOR:
	case X86ISD::AND:
	return SDValue(Op.getNode(), 1);
	default:
	default_case:
	break;
	}

	// If we found that truncation is beneficial, perform the truncation and
	// update 'Op'.
	if (NeedTruncation) {
	EVT VT = Op.getValueType();
	SDValue WideVal = Op->getOperand(0);
	EVT WideVT = WideVal.getValueType();
	unsigned ConvertedOp = 0;
	// Use a target machine opcode to prevent further DAGCombine
	// optimizations that may separate the arithmetic operations
	// from the setcc node.
	switch (WideVal.getOpcode()) {
	default: break;
	case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
	case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
	case ISD::AND: ConvertedOp = X86ISD::AND; break;
	case ISD::OR: ConvertedOp = X86ISD::OR; break;
	case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
	}

	if (ConvertedOp) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
	SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
	SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
	Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1);
	}
	}
	}

	if (Opcode == 0) {
	// Emit KTEST for bit vectors
	if (auto Node = EmitKTEST(Op, DAG, Subtarget))
	return Node;

	// Emit a CMP with 0, which is the TEST pattern.
	return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
	DAG.getConstant(0, dl, Op.getValueType()));
	}
	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
	SmallVector<SDValue, 4> Ops(Op->op_begin(), Op->op_begin() + NumOperands);

	SDValue New = DAG.getNode(Opcode, dl, VTs, Ops);
	DAG.ReplaceAllUsesWith(Op, New);
	return SDValue(New.getNode(), 1);
	}

	/// Emit nodes that will be selected as "cmp Op0,Op1", or something
	/// equivalent.
	SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
	const SDLoc &dl, SelectionDAG &DAG) const {
	if (isNullConstant(Op1))
	return EmitTest(Op0, X86CC, dl, DAG);

	assert(!(isa<ConstantSDNode>(Op1) && Op0.getValueType() == MVT::i1) &&
	"Unexpected comparison operation for MVT::i1 operands");

	if ((Op0.getValueType() == MVT::i8 \|\| Op0.getValueType() == MVT::i16 \|\|
	Op0.getValueType() == MVT::i32 \|\| Op0.getValueType() == MVT::i64)) {
	// Only promote the compare up to I32 if it is a 16 bit operation
	// with an immediate. 16 bit immediates are to be avoided.
	if ((Op0.getValueType() == MVT::i16 &&
	(isa<ConstantSDNode>(Op0) \|\| isa<ConstantSDNode>(Op1))) &&
	!DAG.getMachineFunction().getFunction()->optForMinSize() &&
	!Subtarget.isAtom()) {
	unsigned ExtendOp =
	isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
	Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0);
	Op1 = DAG.getNode(ExtendOp, dl, MVT::i32, Op1);
	}
	// Use SUB instead of CMP to enable CSE between SUB and CMP.
	SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
	SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
	Op0, Op1);
	return SDValue(Sub.getNode(), 1);
	}
	return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
	}

	/// Convert a comparison if required by the subtarget.
	SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
	SelectionDAG &DAG) const {
	// If the subtarget does not support the FUCOMI instruction, floating-point
	// comparisons have to be converted.
	if (Subtarget.hasCMov() \|\|
	Cmp.getOpcode() != X86ISD::CMP \|\|
	!Cmp.getOperand(0).getValueType().isFloatingPoint() \|\|
	!Cmp.getOperand(1).getValueType().isFloatingPoint())
	return Cmp;

	// The instruction selector will select an FUCOM instruction instead of
	// FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence
	// build an SDNode sequence that transfers the result from FPSW into EFLAGS:
	// (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8))))
	SDLoc dl(Cmp);
	SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
	SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
	SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
	DAG.getConstant(8, dl, MVT::i8));
	SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);

	// Some 64-bit targets lack SAHF support, but they do support FCOMI.
	assert(Subtarget.hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
	return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
	}

	/// Check if replacement of SQRT with RSQRT should be disabled.
	bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	// We never want to use both SQRT and RSQRT instructions for the same input.
	if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op))
	return false;

	if (VT.isVector())
	return Subtarget.hasFastVectorFSQRT();
	return Subtarget.hasFastScalarFSQRT();
	}

	/// The minimum architected relative accuracy is 2^-12. We need one
	/// Newton-Raphson step to have a good float result (24 bits of precision).
	SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
	SelectionDAG &DAG, int Enabled,
	int &RefinementSteps,
	bool &UseOneConstNR,
	bool Reciprocal) const {
	EVT VT = Op.getValueType();

	// SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
	// TODO: Add support for AVX512 (v16f32).
	// It is likely not profitable to do this for f64 because a double-precision
	// rsqrt estimate with refinement on x86 prior to FMA requires at least 16
	// instructions: convert to single, rsqrtss, convert back to double, refine
	// (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
	// along with FMA, this could be a throughput win.
	if ((VT == MVT::f32 && Subtarget.hasSSE1()) \|\|
	(VT == MVT::v4f32 && Subtarget.hasSSE1()) \|\|
	(VT == MVT::v8f32 && Subtarget.hasAVX())) {
	if (RefinementSteps == ReciprocalEstimate::Unspecified)
	RefinementSteps = 1;

	UseOneConstNR = false;
	return DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
	}
	return SDValue();
	}

	/// The minimum architected relative accuracy is 2^-12. We need one
	/// Newton-Raphson step to have a good float result (24 bits of precision).
	SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
	int Enabled,
	int &RefinementSteps) const {
	EVT VT = Op.getValueType();

	// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
	// TODO: Add support for AVX512 (v16f32).
	// It is likely not profitable to do this for f64 because a double-precision
	// reciprocal estimate with refinement on x86 prior to FMA requires
	// 15 instructions: convert to single, rcpss, convert back to double, refine
	// (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
	// along with FMA, this could be a throughput win.

	if ((VT == MVT::f32 && Subtarget.hasSSE1()) \|\|
	(VT == MVT::v4f32 && Subtarget.hasSSE1()) \|\|
	(VT == MVT::v8f32 && Subtarget.hasAVX())) {
	// Enable estimate codegen with 1 refinement step for vector division.
	// Scalar division estimates are disabled because they break too much
	// real-world code. These defaults are intended to match GCC behavior.
	if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified)
	return SDValue();

	if (RefinementSteps == ReciprocalEstimate::Unspecified)
	RefinementSteps = 1;

	return DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
	}
	return SDValue();
	}

	/// If we have at least two divisions that use the same divisor, convert to
	/// multiplication by a reciprocal. This may need to be adjusted for a given
	/// CPU if a division's cost is not at least twice the cost of a multiplication.
	/// This is because we still need one division to calculate the reciprocal and
	/// then we need two multiplies by that reciprocal as replacements for the
	/// original divisions.
	unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
	return 2;
	}

	/// Helper for creating a X86ISD::SETCC node.
	static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
	SelectionDAG &DAG) {
	return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
	DAG.getConstant(Cond, dl, MVT::i8), EFLAGS);
	}

	/// Create a BT (Bit Test) node - Test bit \p BitNo in \p Src and set condition
	/// according to equal/not-equal condition code \p CC.
	static SDValue getBitTestCondition(SDValue Src, SDValue BitNo, ISD::CondCode CC,
	const SDLoc &dl, SelectionDAG &DAG) {
	// If Src is i8, promote it to i32 with any_extend. There is no i8 BT
	// instruction. Since the shift amount is in-range-or-undefined, we know
	// that doing a bittest on the i32 value is ok. We extend to i32 because
	// the encoding for the i16 version is larger than the i32 version.
	// Also promote i16 to i32 for performance / code size reason.
	if (Src.getValueType() == MVT::i8 \|\| Src.getValueType() == MVT::i16)
	Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);

	// See if we can use the 32-bit instruction instead of the 64-bit one for a
	// shorter encoding. Since the former takes the modulo 32 of BitNo and the
	// latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
	// known to be zero.
	if (Src.getValueType() == MVT::i64 &&
	DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
	Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);

	// If the operand types disagree, extend the shift amount to match. Since
	// BT ignores high bits (like shifts) we can use anyextend.
	if (Src.getValueType() != BitNo.getValueType())
	BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);

	SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
	X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
	return getSETCC(Cond, BT, dl , DAG);
	}

	/// Result of 'and' is compared against zero. Change to a BT node if possible.
	static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
	const SDLoc &dl, SelectionDAG &DAG) {
	SDValue Op0 = And.getOperand(0);
	SDValue Op1 = And.getOperand(1);
	if (Op0.getOpcode() == ISD::TRUNCATE)
	Op0 = Op0.getOperand(0);
	if (Op1.getOpcode() == ISD::TRUNCATE)
	Op1 = Op1.getOperand(0);

	SDValue LHS, RHS;
	if (Op1.getOpcode() == ISD::SHL)
	std::swap(Op0, Op1);
	if (Op0.getOpcode() == ISD::SHL) {
	if (isOneConstant(Op0.getOperand(0))) {
	// If we looked past a truncate, check that it's only truncating away
	// known zeros.
	unsigned BitWidth = Op0.getValueSizeInBits();
	unsigned AndBitWidth = And.getValueSizeInBits();
	if (BitWidth > AndBitWidth) {
	KnownBits Known;
	DAG.computeKnownBits(Op0, Known);
	if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth)
	return SDValue();
	}
	LHS = Op1;
	RHS = Op0.getOperand(1);
	}
	} else if (Op1.getOpcode() == ISD::Constant) {
	ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
	uint64_t AndRHSVal = AndRHS->getZExtValue();
	SDValue AndLHS = Op0;

	if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
	LHS = AndLHS.getOperand(0);
	RHS = AndLHS.getOperand(1);
	}

	// Use BT if the immediate can't be encoded in a TEST instruction.
	if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) {
	LHS = AndLHS;
	RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, LHS.getValueType());
	}
	}

	if (LHS.getNode())
	return getBitTestCondition(LHS, RHS, CC, dl, DAG);

	return SDValue();
	}

	// Convert (truncate (srl X, N) to i1) to (bt X, N)
	static SDValue LowerTruncateToBT(SDValue Op, ISD::CondCode CC,
	const SDLoc &dl, SelectionDAG &DAG) {

	assert(Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1 &&
	"Expected TRUNCATE to i1 node");

	if (Op.getOperand(0).getOpcode() != ISD::SRL)
	return SDValue();

	SDValue ShiftRight = Op.getOperand(0);
	return getBitTestCondition(ShiftRight.getOperand(0), ShiftRight.getOperand(1),
	CC, dl, DAG);
	}

	/// Result of 'and' or 'trunc to i1' is compared against zero.
	/// Change to a BT node if possible.
	SDValue X86TargetLowering::LowerToBT(SDValue Op, ISD::CondCode CC,
	const SDLoc &dl, SelectionDAG &DAG) const {
	if (Op.getOpcode() == ISD::AND)
	return LowerAndToBT(Op, CC, dl, DAG);
	if (Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1)
	return LowerTruncateToBT(Op, CC, dl, DAG);
	return SDValue();
	}

	/// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
	/// CMPs.
	static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
	SDValue &Op1) {
	unsigned SSECC;
	bool Swap = false;

	// SSE Condition code mapping:
	// 0 - EQ
	// 1 - LT
	// 2 - LE
	// 3 - UNORD
	// 4 - NEQ
	// 5 - NLT
	// 6 - NLE
	// 7 - ORD
	switch (SetCCOpcode) {
	default: llvm_unreachable("Unexpected SETCC condition");
	case ISD::SETOEQ:
	case ISD::SETEQ: SSECC = 0; break;
	case ISD::SETOGT:
	case ISD::SETGT: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETLT:
	case ISD::SETOLT: SSECC = 1; break;
	case ISD::SETOGE:
	case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETLE:
	case ISD::SETOLE: SSECC = 2; break;
	case ISD::SETUO: SSECC = 3; break;
	case ISD::SETUNE:
	case ISD::SETNE: SSECC = 4; break;
	case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETUGE: SSECC = 5; break;
	case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETUGT: SSECC = 6; break;
	case ISD::SETO: SSECC = 7; break;
	case ISD::SETUEQ:
	case ISD::SETONE: SSECC = 8; break;
	}
	if (Swap)
	std::swap(Op0, Op1);

	return SSECC;
	}

	/// Break a VSETCC 256-bit integer VSETCC into two new 128 ones and then
	/// concatenate the result back.
	static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();

	assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
	"Unsupported value type for operation");

	unsigned NumElems = VT.getVectorNumElements();
	SDLoc dl(Op);
	SDValue CC = Op.getOperand(2);

	// Extract the LHS vectors
	SDValue LHS = Op.getOperand(0);
	SDValue LHS1 = extract128BitVector(LHS, 0, DAG, dl);
	SDValue LHS2 = extract128BitVector(LHS, NumElems / 2, DAG, dl);

	// Extract the RHS vectors
	SDValue RHS = Op.getOperand(1);
	SDValue RHS1 = extract128BitVector(RHS, 0, DAG, dl);
	SDValue RHS2 = extract128BitVector(RHS, NumElems / 2, DAG, dl);

	// Issue the operation on the smaller types and concatenate the result back
	MVT EltVT = VT.getVectorElementType();
	MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
	DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
	DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
	}

	static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	SDValue CC = Op.getOperand(2);
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);

	assert(Op0.getSimpleValueType().getVectorElementType() == MVT::i1 &&
	"Unexpected type for boolean compare operation");
	ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
	SDValue NotOp0 = DAG.getNode(ISD::XOR, dl, VT, Op0,
	DAG.getConstant(-1, dl, VT));
	SDValue NotOp1 = DAG.getNode(ISD::XOR, dl, VT, Op1,
	DAG.getConstant(-1, dl, VT));
	switch (SetCCOpcode) {
	default: llvm_unreachable("Unexpected SETCC condition");
	case ISD::SETEQ:
	// (x == y) -> ~(x ^ y)
	return DAG.getNode(ISD::XOR, dl, VT,
	DAG.getNode(ISD::XOR, dl, VT, Op0, Op1),
	DAG.getConstant(-1, dl, VT));
	case ISD::SETNE:
	// (x != y) -> (x ^ y)
	return DAG.getNode(ISD::XOR, dl, VT, Op0, Op1);
	case ISD::SETUGT:
	case ISD::SETGT:
	// (x > y) -> (x & ~y)
	return DAG.getNode(ISD::AND, dl, VT, Op0, NotOp1);
	case ISD::SETULT:
	case ISD::SETLT:
	// (x < y) -> (~x & y)
	return DAG.getNode(ISD::AND, dl, VT, NotOp0, Op1);
	case ISD::SETULE:
	case ISD::SETLE:
	// (x <= y) -> (~x \| y)
	return DAG.getNode(ISD::OR, dl, VT, NotOp0, Op1);
	case ISD::SETUGE:
	case ISD::SETGE:
	// (x >=y) -> (x \| ~y)
	return DAG.getNode(ISD::OR, dl, VT, Op0, NotOp1);
	}
	}

	static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {

	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	SDValue CC = Op.getOperand(2);
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);

	assert(VT.getVectorElementType() == MVT::i1 &&
	"Cannot set masked compare for this operation");

	ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
	unsigned Opc = 0;
	bool Unsigned = false;
	bool Swap = false;
	unsigned SSECC;
	switch (SetCCOpcode) {
	default: llvm_unreachable("Unexpected SETCC condition");
	case ISD::SETNE: SSECC = 4; break;
	case ISD::SETEQ: Opc = X86ISD::PCMPEQM; break;
	case ISD::SETUGT: SSECC = 6; Unsigned = true; break;
	case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
	case ISD::SETGT: Opc = X86ISD::PCMPGTM; break;
	case ISD::SETULT: SSECC = 1; Unsigned = true; break;
	case ISD::SETUGE: SSECC = 5; Unsigned = true; break; //NLT
	case ISD::SETGE: Swap = true; SSECC = 2; break; // LE + swap
	case ISD::SETULE: Unsigned = true; LLVM_FALLTHROUGH;
	case ISD::SETLE: SSECC = 2; break;
	}

	if (Swap)
	std::swap(Op0, Op1);
	if (Opc)
	return DAG.getNode(Opc, dl, VT, Op0, Op1);
	Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
	return DAG.getNode(Opc, dl, VT, Op0, Op1,
	DAG.getConstant(SSECC, dl, MVT::i8));
	}

	/// \brief Try to turn a VSETULT into a VSETULE by modifying its second
	/// operand \p Op1. If non-trivial (for example because it's not constant)
	/// return an empty value.
	static SDValue ChangeVSETULTtoVSETULE(const SDLoc &dl, SDValue Op1,
	SelectionDAG &DAG) {
	BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1.getNode());
	if (!BV)
	return SDValue();

	MVT VT = Op1.getSimpleValueType();
	MVT EVT = VT.getVectorElementType();
	unsigned n = VT.getVectorNumElements();
	SmallVector<SDValue, 8> ULTOp1;

	for (unsigned i = 0; i < n; ++i) {
	ConstantSDNode *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
	if (!Elt \|\| Elt->isOpaque() \|\| Elt->getSimpleValueType(0) != EVT)
	return SDValue();

	// Avoid underflow.
	APInt Val = Elt->getAPIntValue();
	if (Val == 0)
	return SDValue();

	ULTOp1.push_back(DAG.getConstant(Val - 1, dl, EVT));
	}

	return DAG.getBuildVector(VT, dl, ULTOp1);
	}

	static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	SDValue CC = Op.getOperand(2);
	MVT VT = Op.getSimpleValueType();
	ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get();
	bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint();
	SDLoc dl(Op);

	if (isFP) {
	#ifndef NDEBUG
	MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
	assert(EltVT == MVT::f32 \|\| EltVT == MVT::f64);
	#endif

	unsigned Opc;
	if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) {
	assert(VT.getVectorNumElements() <= 16);
	Opc = X86ISD::CMPM;
	} else {
	Opc = X86ISD::CMPP;
	// The SSE/AVX packed FP comparison nodes are defined with a
	// floating-point vector result that matches the operand type. This allows
	// them to work with an SSE1 target (integer vector types are not legal).
	VT = Op0.getSimpleValueType();
	}

	// In the two cases not handled by SSE compare predicates (SETUEQ/SETONE),
	// emit two comparisons and a logic op to tie them together.
	// TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is
	// available.
	SDValue Cmp;
	unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1);
	if (SSECC == 8) {
	// LLVM predicate is SETUEQ or SETONE.
	unsigned CC0, CC1;
	unsigned CombineOpc;
	if (Cond == ISD::SETUEQ) {
	CC0 = 3; // UNORD
	CC1 = 0; // EQ
	CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FOR) :
	static_cast<unsigned>(ISD::OR);
	} else {
	assert(Cond == ISD::SETONE);
	CC0 = 7; // ORD
	CC1 = 4; // NEQ
	CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FAND) :
	static_cast<unsigned>(ISD::AND);
	}

	SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
	DAG.getConstant(CC0, dl, MVT::i8));
	SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
	DAG.getConstant(CC1, dl, MVT::i8));
	Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
	} else {
	// Handle all other FP comparisons here.
	Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1,
	DAG.getConstant(SSECC, dl, MVT::i8));
	}

	// If this is SSE/AVX CMPP, bitcast the result back to integer to match the
	// result type of SETCC. The bitcast is expected to be optimized away
	// during combining/isel.
	if (Opc == X86ISD::CMPP)
	Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp);

	return Cmp;
	}

	MVT VTOp0 = Op0.getSimpleValueType();
	assert(VTOp0 == Op1.getSimpleValueType() &&
	"Expected operands with same type!");
	assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() &&
	"Invalid number of packed elements for source and destination!");

	if (VT.is128BitVector() && VTOp0.is256BitVector()) {
	// On non-AVX512 targets, a vector of MVT::i1 is promoted by the type
	// legalizer to a wider vector type. In the case of 'vsetcc' nodes, the
	// legalizer firstly checks if the first operand in input to the setcc has
	// a legal type. If so, then it promotes the return type to that same type.
	// Otherwise, the return type is promoted to the 'next legal type' which,
	// for a vector of MVT::i1 is always a 128-bit integer vector type.
	//
	// We reach this code only if the following two conditions are met:
	// 1. Both return type and operand type have been promoted to wider types
	// by the type legalizer.
	// 2. The original operand type has been promoted to a 256-bit vector.
	//
	// Note that condition 2. only applies for AVX targets.
	SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, Cond);
	return DAG.getZExtOrTrunc(NewOp, dl, VT);
	}

	// The non-AVX512 code below works under the assumption that source and
	// destination types are the same.
	assert((Subtarget.hasAVX512() \|\| (VT == VTOp0)) &&
	"Value types for source and destination must be the same!");

	// Break 256-bit integer vector compare into smaller ones.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return Lower256IntVSETCC(Op, DAG);

	// Operands are boolean (vectors of i1)
	MVT OpVT = Op1.getSimpleValueType();
	if (OpVT.getVectorElementType() == MVT::i1)
	return LowerBoolVSETCC_AVX512(Op, DAG);

	// The result is boolean, but operands are int/float
	if (VT.getVectorElementType() == MVT::i1) {
	// In AVX-512 architecture setcc returns mask with i1 elements,
	// But there is no compare instruction for i8 and i16 elements in KNL.
	// In this case use SSE compare
	bool UseAVX512Inst =
	(OpVT.is512BitVector() \|\|
	OpVT.getScalarSizeInBits() >= 32 \|\|
	(Subtarget.hasBWI() && Subtarget.hasVLX()));

	if (UseAVX512Inst)
	return LowerIntVSETCC_AVX512(Op, DAG);

	return DAG.getNode(ISD::TRUNCATE, dl, VT,
	DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
	}

	// Lower using XOP integer comparisons.
	if ((VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\|
	VT == MVT::v4i32 \|\| VT == MVT::v2i64) && Subtarget.hasXOP()) {
	// Translate compare code to XOP PCOM compare mode.
	unsigned CmpMode = 0;
	switch (Cond) {
	default: llvm_unreachable("Unexpected SETCC condition");
	case ISD::SETULT:
	case ISD::SETLT: CmpMode = 0x00; break;
	case ISD::SETULE:
	case ISD::SETLE: CmpMode = 0x01; break;
	case ISD::SETUGT:
	case ISD::SETGT: CmpMode = 0x02; break;
	case ISD::SETUGE:
	case ISD::SETGE: CmpMode = 0x03; break;
	case ISD::SETEQ: CmpMode = 0x04; break;
	case ISD::SETNE: CmpMode = 0x05; break;
	}

	// Are we comparing unsigned or signed integers?
	unsigned Opc =
	ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM;

	return DAG.getNode(Opc, dl, VT, Op0, Op1,
	DAG.getConstant(CmpMode, dl, MVT::i8));
	}

	// We are handling one of the integer comparisons here. Since SSE only has
	// GT and EQ comparisons for integer, swapping operands and multiple
	// operations may be required for some comparisons.
	unsigned Opc = (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) ? X86ISD::PCMPEQ
	: X86ISD::PCMPGT;
	bool Swap = Cond == ISD::SETLT \|\| Cond == ISD::SETULT \|\|
	Cond == ISD::SETGE \|\| Cond == ISD::SETUGE;
	bool Invert = Cond == ISD::SETNE \|\|
	(Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));

	// If both operands are known non-negative, then an unsigned compare is the
	// same as a signed compare and there's no need to flip signbits.
	// TODO: We could check for more general simplifications here since we're
	// computing known bits.
	bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) &&
	!(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1));

	// Special case: Use min/max operations for SETULE/SETUGE
	MVT VET = VT.getVectorElementType();
	bool HasMinMax =
	(Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) \|\|
	(Subtarget.hasSSE2() && (VET == MVT::i8));
	bool MinMax = false;
	if (HasMinMax) {
	switch (Cond) {
	default: break;
	case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break;
	case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break;
	}

	if (MinMax)
	Swap = Invert = FlipSigns = false;
	}

	bool HasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 \|\| VET == MVT::i16);
	bool Subus = false;
	if (!MinMax && HasSubus) {
	// As another special case, use PSUBUS[BW] when it's profitable. E.g. for
	// Op0 u<= Op1:
	// t = psubus Op0, Op1
	// pcmpeq t, <0..0>
	switch (Cond) {
	default: break;
	case ISD::SETULT: {
	// If the comparison is against a constant we can turn this into a
	// setule. With psubus, setule does not require a swap. This is
	// beneficial because the constant in the register is no longer
	// destructed as the destination so it can be hoisted out of a loop.
	// Only do this pre-AVX since vpcmp* is no longer destructive.
	if (Subtarget.hasAVX())
	break;
	if (SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG)) {
	Op1 = ULEOp1;
	Subus = true; Invert = false; Swap = false;
	}
	break;
	}
	// Psubus is better than flip-sign because it requires no inversion.
	case ISD::SETUGE: Subus = true; Invert = false; Swap = true; break;
	case ISD::SETULE: Subus = true; Invert = false; Swap = false; break;
	}

	if (Subus) {
	Opc = X86ISD::SUBUS;
	FlipSigns = false;
	}
	}

	if (Swap)
	std::swap(Op0, Op1);

	// Check that the operation in question is available (most are plain SSE2,
	// but PCMPGTQ and PCMPEQQ have different requirements).
	if (VT == MVT::v2i64) {
	if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) {
	assert(Subtarget.hasSSE2() && "Don't know how to lower!");

	// First cast everything to the right type.
	Op0 = DAG.getBitcast(MVT::v4i32, Op0);
	Op1 = DAG.getBitcast(MVT::v4i32, Op1);

	// Since SSE has no unsigned integer comparisons, we need to flip the sign
	// bits of the inputs before performing those operations. The lower
	// compare is always unsigned.
	SDValue SB;
	if (FlipSigns) {
	SB = DAG.getConstant(0x80000000U, dl, MVT::v4i32);
	} else {
	SDValue Sign = DAG.getConstant(0x80000000U, dl, MVT::i32);
	SDValue Zero = DAG.getConstant(0x00000000U, dl, MVT::i32);
	SB = DAG.getBuildVector(MVT::v4i32, dl, {Sign, Zero, Sign, Zero});
	}
	Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB);
	Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB);

	// Emulate PCMPGTQ with (hi1 > hi2) \| ((hi1 == hi2) & (lo1 > lo2))
	SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
	SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);

	// Create masks for only the low parts/high parts of the 64 bit integers.
	static const int MaskHi[] = { 1, 1, 3, 3 };
	static const int MaskLo[] = { 0, 0, 2, 2 };
	SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
	SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
	SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);

	SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo);
	Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi);

	if (Invert)
	Result = DAG.getNOT(dl, Result, MVT::v4i32);

	return DAG.getBitcast(VT, Result);
	}

	if (Opc == X86ISD::PCMPEQ && !Subtarget.hasSSE41()) {
	// If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
	// pcmpeqd + pshufd + pand.
	assert(Subtarget.hasSSE2() && !FlipSigns && "Don't know how to lower!");

	// First cast everything to the right type.
	Op0 = DAG.getBitcast(MVT::v4i32, Op0);
	Op1 = DAG.getBitcast(MVT::v4i32, Op1);

	// Do the compare.
	SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);

	// Make sure the lower and upper halves are both all-ones.
	static const int Mask[] = { 1, 0, 3, 2 };
	SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
	Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);

	if (Invert)
	Result = DAG.getNOT(dl, Result, MVT::v4i32);

	return DAG.getBitcast(VT, Result);
	}
	}

	// Since SSE has no unsigned integer comparisons, we need to flip the sign
	// bits of the inputs before performing those operations.
	if (FlipSigns) {
	MVT EltVT = VT.getVectorElementType();
	SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl,
	VT);
	Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM);
	Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM);
	}

	SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);

	// If the logical-not of the result is required, perform that now.
	if (Invert)
	Result = DAG.getNOT(dl, Result, VT);

	if (MinMax)
	Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result);

	if (Subus)
	Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
	getZeroVector(VT, Subtarget, DAG, dl));

	return Result;
	}

	SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {

	MVT VT = Op.getSimpleValueType();

	if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);

	assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	SDLoc dl(Op);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

	// Optimize to BT if possible.
	// Lower (X & (1 << N)) == 0 to BT(X, N).
	// Lower ((X >>u N) & 1) != 0 to BT(X, N).
	// Lower ((X >>s N) & 1) != 0 to BT(X, N).
	// Lower (trunc (X >> N) to i1) to BT(X, N).
	if (Op0.hasOneUse() && isNullConstant(Op1) &&
	(CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
	if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) {
	if (VT == MVT::i1)
	return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewSetCC);
	return NewSetCC;
	}
	}

	// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
	// these.
	if ((isOneConstant(Op1) \|\| isNullConstant(Op1)) &&
	(CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {

	// If the input is a setcc, then reuse the input setcc or use a new one with
	// the inverted condition.
	if (Op0.getOpcode() == X86ISD::SETCC) {
	X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
	bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1);
	if (!Invert)
	return Op0;

	CCode = X86::GetOppositeBranchCondition(CCode);
	SDValue SetCC = getSETCC(CCode, Op0.getOperand(1), dl, DAG);
	if (VT == MVT::i1)
	return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
	return SetCC;
	}
	}
	if (Op0.getValueType() == MVT::i1 && (CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
	if (isOneConstant(Op1)) {
	ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true);
	return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, dl, MVT::i1), NewCC);
	}
	if (!isNullConstant(Op1)) {
	SDValue Xor = DAG.getNode(ISD::XOR, dl, MVT::i1, Op0, Op1);
	return DAG.getSetCC(dl, VT, Xor, DAG.getConstant(0, dl, MVT::i1), CC);
	}
	}

	bool IsFP = Op1.getSimpleValueType().isFloatingPoint();
	X86::CondCode X86CC = TranslateX86CC(CC, dl, IsFP, Op0, Op1, DAG);
	if (X86CC == X86::COND_INVALID)
	return SDValue();

	SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG);
	EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
	SDValue SetCC = getSETCC(X86CC, EFLAGS, dl, DAG);
	if (VT == MVT::i1)
	return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
	return SetCC;
	}

	SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	SDValue Carry = Op.getOperand(2);
	SDValue Cond = Op.getOperand(3);
	SDLoc DL(Op);

	assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
	X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get());

	// Recreate the carry if needed.
	EVT CarryVT = Carry.getValueType();
	APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
	Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
	Carry, DAG.getConstant(NegOne, DL, CarryVT));

	SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
	SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1));
	SDValue SetCC = getSETCC(CC, Cmp.getValue(1), DL, DAG);
	if (Op.getSimpleValueType() == MVT::i1)
	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
	return SetCC;
	}

	/// Return true if opcode is a X86 logical comparison.
	static bool isX86LogicalCmp(SDValue Op) {
	unsigned Opc = Op.getOpcode();
	if (Opc == X86ISD::CMP \|\| Opc == X86ISD::COMI \|\| Opc == X86ISD::UCOMI \|\|
	Opc == X86ISD::SAHF)
	return true;
	if (Op.getResNo() == 1 &&
	(Opc == X86ISD::ADD \|\| Opc == X86ISD::SUB \|\| Opc == X86ISD::ADC \|\|
	Opc == X86ISD::SBB \|\| Opc == X86ISD::SMUL \|\| Opc == X86ISD::UMUL \|\|
	Opc == X86ISD::INC \|\| Opc == X86ISD::DEC \|\| Opc == X86ISD::OR \|\|
	Opc == X86ISD::XOR \|\| Opc == X86ISD::AND))
	return true;

	if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
	return true;

	return false;
	}

	static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
	if (V.getOpcode() != ISD::TRUNCATE)
	return false;

	SDValue VOp0 = V.getOperand(0);
	unsigned InBits = VOp0.getValueSizeInBits();
	unsigned Bits = V.getValueSizeInBits();
	return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
	}

	SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
	bool AddTest = true;
	SDValue Cond = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	SDValue Op2 = Op.getOperand(2);
	SDLoc DL(Op);
	MVT VT = Op1.getSimpleValueType();
	SDValue CC;

	// Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
	// are available or VBLENDV if AVX is available.
	// Otherwise FP cmovs get lowered into a less efficient branch sequence later.
	if (Cond.getOpcode() == ISD::SETCC &&
	((Subtarget.hasSSE2() && (VT == MVT::f32 \|\| VT == MVT::f64)) \|\|
	(Subtarget.hasSSE1() && VT == MVT::f32)) &&
	VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {
	SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
	int SSECC = translateX86FSETCC(
	cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);

	if (SSECC != 8) {
	if (Subtarget.hasAVX512()) {
	SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0,
	CondOp1, DAG.getConstant(SSECC, DL, MVT::i8));
	return DAG.getNode(VT.isVector() ? X86ISD::SELECT : X86ISD::SELECTS,
	DL, VT, Cmp, Op1, Op2);
	}

	SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
	DAG.getConstant(SSECC, DL, MVT::i8));

	// If we have AVX, we can use a variable vector select (VBLENDV) instead
	// of 3 logic instructions for size savings and potentially speed.
	// Unfortunately, there is no scalar form of VBLENDV.

	// If either operand is a constant, don't try this. We can expect to
	// optimize away at least one of the logic instructions later in that
	// case, so that sequence would be faster than a variable blend.

	// BLENDV was introduced with SSE 4.1, but the 2 register form implicitly
	// uses XMM0 as the selection register. That may need just as many
	// instructions as the AND/ANDN/OR sequence due to register moves, so
	// don't bother.

	if (Subtarget.hasAVX() &&
	!isa<ConstantFPSDNode>(Op1) && !isa<ConstantFPSDNode>(Op2)) {

	// Convert to vectors, do a VSELECT, and convert back to scalar.
	// All of the conversions should be optimized away.

	MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
	SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
	SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
	SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp);

	MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
	VCmp = DAG.getBitcast(VCmpVT, VCmp);

	SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2);

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
	VSel, DAG.getIntPtrConstant(0, DL));
	}
	SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
	SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
	return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And);
	}
	}

	// AVX512 fallback is to lower selects of scalar floats to masked moves.
	if ((VT == MVT::f64 \|\| VT == MVT::f32) && Subtarget.hasAVX512()) {
	SDValue Cmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Cond);
	return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
	}

	if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
	SDValue Op1Scalar;
	if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
	Op1Scalar = ConvertI1VectorToInteger(Op1, DAG);
	else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
	Op1Scalar = Op1.getOperand(0);
	SDValue Op2Scalar;
	if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
	Op2Scalar = ConvertI1VectorToInteger(Op2, DAG);
	else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
	Op2Scalar = Op2.getOperand(0);
	if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
	SDValue newSelect = DAG.getSelect(DL, Op1Scalar.getValueType(), Cond,
	Op1Scalar, Op2Scalar);
	if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
	return DAG.getBitcast(VT, newSelect);
	SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
	DAG.getIntPtrConstant(0, DL));
	}
	}

	if (VT == MVT::v4i1 \|\| VT == MVT::v2i1) {
	SDValue zeroConst = DAG.getIntPtrConstant(0, DL);
	Op1 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
	DAG.getUNDEF(MVT::v8i1), Op1, zeroConst);
	Op2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
	DAG.getUNDEF(MVT::v8i1), Op2, zeroConst);
	SDValue newSelect = DAG.getSelect(DL, MVT::v8i1, Cond, Op1, Op2);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
	}

	if (Cond.getOpcode() == ISD::SETCC) {
	if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
	Cond = NewCond;
	// If the condition was updated, it's possible that the operands of the
	// select were also updated (for example, EmitTest has a RAUW). Refresh
	// the local references to the select operands in case they got stale.
	Op1 = Op.getOperand(1);
	Op2 = Op.getOperand(2);
	}
	}

	// (select (x == 0), -1, y) -> (sign_bit (x - 1)) \| y
	// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) \| y
	// (select (x != 0), y, -1) -> (sign_bit (x - 1)) \| y
	// (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) \| y
	// (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y
	// (select (and (x , 0x1) == 0), y, (z \| y) ) -> (-(and (x , 0x1)) & z ) \| y
	if (Cond.getOpcode() == X86ISD::SETCC &&
	Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
	isNullConstant(Cond.getOperand(1).getOperand(1))) {
	SDValue Cmp = Cond.getOperand(1);
	unsigned CondCode =
	cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();

	if ((isAllOnesConstant(Op1) \|\| isAllOnesConstant(Op2)) &&
	(CondCode == X86::COND_E \|\| CondCode == X86::COND_NE)) {
	SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
	SDValue CmpOp0 = Cmp.getOperand(0);

	// Apply further optimizations for special cases
	// (select (x != 0), -1, 0) -> neg & sbb
	// (select (x == 0), 0, -1) -> neg & sbb
	if (isNullConstant(Y) &&
	(isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
	SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
	SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
	SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0);
	SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
	DAG.getConstant(X86::COND_B, DL, MVT::i8),
	SDValue(Neg.getNode(), 1));
	return Res;
	}

	Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
	CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
	Cmp = ConvertCmpIfNecessary(Cmp, DAG);

	SDValue Res = // Res = 0 or -1.
	DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
	DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp);

	if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
	Res = DAG.getNOT(DL, Res, Res.getValueType());

	if (!isNullConstant(Op2))
	Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
	return Res;
	} else if (!Subtarget.hasCMov() && CondCode == X86::COND_E &&
	Cmp.getOperand(0).getOpcode() == ISD::AND &&
	isOneConstant(Cmp.getOperand(0).getOperand(1))) {
	SDValue CmpOp0 = Cmp.getOperand(0);
	SDValue Src1, Src2;
	// true if Op2 is XOR or OR operator and one of its operands
	// is equal to Op1
	// ( a , a op b) \|\| ( b , a op b)
	auto isOrXorPattern = [&]() {
	if ((Op2.getOpcode() == ISD::XOR \|\| Op2.getOpcode() == ISD::OR) &&
	(Op2.getOperand(0) == Op1 \|\| Op2.getOperand(1) == Op1)) {
	Src1 =
	Op2.getOperand(0) == Op1 ? Op2.getOperand(1) : Op2.getOperand(0);
	Src2 = Op1;
	return true;
	}
	return false;
	};

	if (isOrXorPattern()) {
	SDValue Neg;
	unsigned int CmpSz = CmpOp0.getSimpleValueType().getSizeInBits();
	// we need mask of all zeros or ones with same size of the other
	// operands.
	if (CmpSz > VT.getSizeInBits())
	Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0);
	else if (CmpSz < VT.getSizeInBits())
	Neg = DAG.getNode(ISD::AND, DL, VT,
	DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)),
	DAG.getConstant(1, DL, VT));
	else
	Neg = CmpOp0;
	SDValue Mask = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
	Neg); // -(and (x, 0x1))
	SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
	return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); // And Op y
	}
	}
	}

	// Look past (and (setcc_carry (cmp ...)), 1).
	if (Cond.getOpcode() == ISD::AND &&
	Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY &&
	isOneConstant(Cond.getOperand(1)))
	Cond = Cond.getOperand(0);

	// If condition flag is set by a X86ISD::CMP, then use it as the condition
	// setting operand in place of the X86ISD::SETCC.
	unsigned CondOpcode = Cond.getOpcode();
	if (CondOpcode == X86ISD::SETCC \|\|
	CondOpcode == X86ISD::SETCC_CARRY) {
	CC = Cond.getOperand(0);

	SDValue Cmp = Cond.getOperand(1);
	unsigned Opc = Cmp.getOpcode();
	MVT VT = Op.getSimpleValueType();

	bool IllegalFPCMov = false;
	if (VT.isFloatingPoint() && !VT.isVector() &&
	!isScalarFPTypeInSSEReg(VT)) // FPStack?
	IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());

	if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) \|\|
	Opc == X86ISD::BT) { // FIXME
	Cond = Cmp;
	AddTest = false;
	}
	} else if (CondOpcode == ISD::USUBO \|\| CondOpcode == ISD::SSUBO \|\|
	CondOpcode == ISD::UADDO \|\| CondOpcode == ISD::SADDO \|\|
	((CondOpcode == ISD::UMULO \|\| CondOpcode == ISD::SMULO) &&
	Cond.getOperand(0).getValueType() != MVT::i8)) {
	SDValue LHS = Cond.getOperand(0);
	SDValue RHS = Cond.getOperand(1);
	unsigned X86Opcode;
	unsigned X86Cond;
	SDVTList VTs;
	switch (CondOpcode) {
	case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
	case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
	case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
	case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
	case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
	case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
	default: llvm_unreachable("unexpected overflowing operator");
	}
	if (CondOpcode == ISD::UMULO)
	VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
	MVT::i32);
	else
	VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);

	SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS);

	if (CondOpcode == ISD::UMULO)
	Cond = X86Op.getValue(2);
	else
	Cond = X86Op.getValue(1);

	CC = DAG.getConstant(X86Cond, DL, MVT::i8);
	AddTest = false;
	}

	if (AddTest) {
	// Look past the truncate if the high bits are known zero.
	if (isTruncWithZeroHighBitsInput(Cond, DAG))
	Cond = Cond.getOperand(0);

	// We know the result of AND is compared against zero. Try to match
	// it to BT.
	if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
	if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) {
	CC = NewSetCC.getOperand(0);
	Cond = NewSetCC.getOperand(1);
	AddTest = false;
	}
	}
	}

	if (AddTest) {
	CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8);
	Cond = EmitTest(Cond, X86::COND_NE, DL, DAG);
	}

	// a < b ? -1 : 0 -> RES = ~setcc_carry
	// a < b ? 0 : -1 -> RES = setcc_carry
	// a >= b ? -1 : 0 -> RES = setcc_carry
	// a >= b ? 0 : -1 -> RES = ~setcc_carry
	if (Cond.getOpcode() == X86ISD::SUB) {
	Cond = ConvertCmpIfNecessary(Cond, DAG);
	unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();

	if ((CondCode == X86::COND_AE \|\| CondCode == X86::COND_B) &&
	(isAllOnesConstant(Op1) \|\| isAllOnesConstant(Op2)) &&
	(isNullConstant(Op1) \|\| isNullConstant(Op2))) {
	SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
	DAG.getConstant(X86::COND_B, DL, MVT::i8),
	Cond);
	if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B))
	return DAG.getNOT(DL, Res, Res.getValueType());
	return Res;
	}
	}

	// X86 doesn't have an i8 cmov. If both operands are the result of a truncate
	// widen the cmov and push the truncate through. This avoids introducing a new
	// branch during isel and doesn't add any extensions.
	if (Op.getValueType() == MVT::i8 &&
	Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
	SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
	if (T1.getValueType() == T2.getValueType() &&
	// Blacklist CopyFromReg to avoid partial register stalls.
	T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
	SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
	SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond);
	return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
	}
	}

	// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
	// condition is true.
	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
	SDValue Ops[] = { Op2, Op1, CC, Cond };
	return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops);
	}

	static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op->getSimpleValueType(0);
	SDValue In = Op->getOperand(0);
	MVT InVT = In.getSimpleValueType();
	MVT VTElt = VT.getVectorElementType();
	MVT InVTElt = InVT.getVectorElementType();
	SDLoc dl(Op);

	// SKX processor
	if ((InVTElt == MVT::i1) &&
	(((Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16)) \|\|

	((Subtarget.hasDQI() && VTElt.getSizeInBits() >= 32))))

	return DAG.getNode(X86ISD::VSEXT, dl, VT, In);

	unsigned NumElts = VT.getVectorNumElements();

	if (VT.is512BitVector() && InVTElt != MVT::i1 &&
	(NumElts == 8 \|\| NumElts == 16 \|\| Subtarget.hasBWI())) {
	if (In.getOpcode() == X86ISD::VSEXT \|\| In.getOpcode() == X86ISD::VZEXT)
	return getExtendInVec(In.getOpcode(), dl, VT, In.getOperand(0), DAG);
	return getExtendInVec(X86ISD::VSEXT, dl, VT, In, DAG);
	}

	if (InVTElt != MVT::i1)
	return SDValue();

	MVT ExtVT = VT;
	if (!VT.is512BitVector() && !Subtarget.hasVLX())
	ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);

	SDValue V;
	if (Subtarget.hasDQI()) {
	V = getExtendInVec(X86ISD::VSEXT, dl, ExtVT, In, DAG);
	assert(!VT.is512BitVector() && "Unexpected vector type");
	} else {
	SDValue NegOne = getOnesVector(ExtVT, DAG, dl);
	SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl);
	V = DAG.getSelect(dl, ExtVT, In, NegOne, Zero);
	if (ExtVT == VT)
	return V;
	}

	return DAG.getNode(X86ISD::VTRUNC, dl, VT, V);
	}

	// Lowering for SIGN_EXTEND_VECTOR_INREG and ZERO_EXTEND_VECTOR_INREG.
	// For sign extend this needs to handle all vector sizes and SSE4.1 and
	// non-SSE4.1 targets. For zero extend this should only handle inputs of
	// MVT::v64i8 when BWI is not supported, but AVX512 is.
	static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue In = Op->getOperand(0);
	MVT VT = Op->getSimpleValueType(0);
	MVT InVT = In.getSimpleValueType();
	assert(VT.getSizeInBits() == InVT.getSizeInBits());

	MVT SVT = VT.getVectorElementType();
	MVT InSVT = InVT.getVectorElementType();
	assert(SVT.getSizeInBits() > InSVT.getSizeInBits());

	if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
	return SDValue();
	if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
	return SDValue();
	if (!(VT.is128BitVector() && Subtarget.hasSSE2()) &&
	!(VT.is256BitVector() && Subtarget.hasInt256()) &&
	!(VT.is512BitVector() && Subtarget.hasAVX512()))
	return SDValue();

	SDLoc dl(Op);

	// For 256-bit vectors, we only need the lower (128-bit) half of the input.
	// For 512-bit vectors, we need 128-bits or 256-bits.
	if (VT.getSizeInBits() > 128) {
	// Input needs to be at least the same number of elements as output, and
	// at least 128-bits.
	int InSize = InSVT.getSizeInBits() * VT.getVectorNumElements();
	In = extractSubVector(In, 0, DAG, dl, std::max(InSize, 128));
	}

	assert((Op.getOpcode() != ISD::ZERO_EXTEND_VECTOR_INREG \|\|
	InVT == MVT::v64i8) && "Zero extend only for v64i8 input!");

	// SSE41 targets can use the pmovsx* instructions directly for 128-bit results,
	// so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still
	// need to be handled here for 256/512-bit results.
	if (Subtarget.hasInt256()) {
	assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");
	unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ?
	X86ISD::VSEXT : X86ISD::VZEXT;
	return DAG.getNode(ExtOpc, dl, VT, In);
	}

	// We should only get here for sign extend.
	assert(Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG &&
	"Unexpected opcode!");

	// pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
	SDValue Curr = In;
	MVT CurrVT = InVT;

	// As SRAI is only available on i16/i32 types, we expand only up to i32
	// and handle i64 separately.
	while (CurrVT != VT && CurrVT.getVectorElementType() != MVT::i32) {
	Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
	MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
	CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
	Curr = DAG.getBitcast(CurrVT, Curr);
	}

	SDValue SignExt = Curr;
	if (CurrVT != InVT) {
	unsigned SignExtShift =
	CurrVT.getScalarSizeInBits() - InSVT.getSizeInBits();
	SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
	DAG.getConstant(SignExtShift, dl, MVT::i8));
	}

	if (CurrVT == VT)
	return SignExt;

	if (VT == MVT::v2i64 && CurrVT == MVT::v4i32) {
	SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
	DAG.getConstant(31, dl, MVT::i8));
	SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5});
	return DAG.getBitcast(VT, Ext);
	}

	return SDValue();
	}

	static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op->getSimpleValueType(0);
	SDValue In = Op->getOperand(0);
	MVT InVT = In.getSimpleValueType();
	SDLoc dl(Op);

	if (VT.is512BitVector() \|\| InVT.getVectorElementType() == MVT::i1)
	return LowerSIGN_EXTEND_AVX512(Op, Subtarget, DAG);

	if ((VT != MVT::v4i64 \|\| InVT != MVT::v4i32) &&
	(VT != MVT::v8i32 \|\| InVT != MVT::v8i16) &&
	(VT != MVT::v16i16 \|\| InVT != MVT::v16i8))
	return SDValue();

	if (Subtarget.hasInt256())
	return DAG.getNode(X86ISD::VSEXT, dl, VT, In);

	// Optimize vectors in AVX mode
	// Sign extend v8i16 to v8i32 and
	// v4i32 to v4i64
	//
	// Divide input vector into two parts
	// for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
	// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
	// concat the vectors to original VT

	unsigned NumElems = InVT.getVectorNumElements();
	SDValue Undef = DAG.getUNDEF(InVT);

	SmallVector<int,8> ShufMask1(NumElems, -1);
	for (unsigned i = 0; i != NumElems/2; ++i)
	ShufMask1[i] = i;

	SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask1);

	SmallVector<int,8> ShufMask2(NumElems, -1);
	for (unsigned i = 0; i != NumElems/2; ++i)
	ShufMask2[i] = i + NumElems/2;

	SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask2);

	MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
	VT.getVectorNumElements() / 2);

	OpLo = DAG.getSignExtendVectorInReg(OpLo, dl, HalfVT);
	OpHi = DAG.getSignExtendVectorInReg(OpHi, dl, HalfVT);

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
	}

	// Lower truncating store. We need a special lowering to vXi1 vectors
	static SDValue LowerTruncatingStore(SDValue StOp, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	StoreSDNode *St = cast<StoreSDNode>(StOp.getNode());
	SDLoc dl(St);
	EVT MemVT = St->getMemoryVT();
	assert(St->isTruncatingStore() && "We only custom truncating store.");
	assert(MemVT.isVector() && MemVT.getVectorElementType() == MVT::i1 &&
	"Expected truncstore of i1 vector");

	SDValue Op = St->getValue();
	MVT OpVT = Op.getValueType().getSimpleVT();
	unsigned NumElts = OpVT.getVectorNumElements();
	if ((Subtarget.hasVLX() && Subtarget.hasBWI() && Subtarget.hasDQI()) \|\|
	NumElts == 16) {
	// Truncate and store - everything is legal
	Op = DAG.getNode(ISD::TRUNCATE, dl, MemVT, Op);
	if (MemVT.getSizeInBits() < 8)
	Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
	DAG.getUNDEF(MVT::v8i1), Op,
	DAG.getIntPtrConstant(0, dl));
	return DAG.getStore(St->getChain(), dl, Op, St->getBasePtr(),
	St->getMemOperand());
	}

	// A subset, assume that we have only AVX-512F
	if (NumElts <= 8) {
	if (NumElts < 8) {
	// Extend to 8-elts vector
	MVT ExtVT = MVT::getVectorVT(OpVT.getScalarType(), 8);
	Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ExtVT,
	DAG.getUNDEF(ExtVT), Op, DAG.getIntPtrConstant(0, dl));
	}
	Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i1, Op);
	return DAG.getStore(St->getChain(), dl, Op, St->getBasePtr(),
	St->getMemOperand());
	}
	// v32i8
	assert(OpVT == MVT::v32i8 && "Unexpected operand type");
	// Divide the vector into 2 parts and store each part separately
	SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, Op,
	DAG.getIntPtrConstant(0, dl));
	Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::v16i1, Lo);
	SDValue BasePtr = St->getBasePtr();
	SDValue StLo = DAG.getStore(St->getChain(), dl, Lo, BasePtr,
	St->getMemOperand());
	SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, Op,
	DAG.getIntPtrConstant(16, dl));
	Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::v16i1, Hi);

	SDValue BasePtrHi =
	DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
	DAG.getConstant(2, dl, BasePtr.getValueType()));

	SDValue StHi = DAG.getStore(St->getChain(), dl, Hi,
	BasePtrHi, St->getMemOperand());
	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StLo, StHi);
	}

	static SDValue LowerExtended1BitVectorLoad(SDValue Op,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {

	LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
	SDLoc dl(Ld);
	EVT MemVT = Ld->getMemoryVT();
	assert(MemVT.isVector() && MemVT.getScalarType() == MVT::i1 &&
	"Expected i1 vector load");
	unsigned ExtOpcode = Ld->getExtensionType() == ISD::ZEXTLOAD ?
	ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
	MVT VT = Op.getValueType().getSimpleVT();
	unsigned NumElts = VT.getVectorNumElements();

	if ((Subtarget.hasBWI() && NumElts >= 32) \|\|
	(Subtarget.hasDQI() && NumElts < 16) \|\|
	NumElts == 16) {
	// Load and extend - everything is legal
	if (NumElts < 8) {
	SDValue Load = DAG.getLoad(MVT::v8i1, dl, Ld->getChain(),
	Ld->getBasePtr(),
	Ld->getMemOperand());
	// Replace chain users with the new chain.
	assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
	MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
	SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load);

	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
	DAG.getIntPtrConstant(0, dl));
	}
	SDValue Load = DAG.getLoad(MemVT, dl, Ld->getChain(),
	Ld->getBasePtr(),
	Ld->getMemOperand());
	// Replace chain users with the new chain.
	assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));

	// Finally, do a normal sign-extend to the desired register.
	return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Load);
	}

	if (NumElts <= 8) {
	// A subset, assume that we have only AVX-512F
	unsigned NumBitsToLoad = 8;
	MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad);
	SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(),
	Ld->getBasePtr(),
	Ld->getMemOperand());
	// Replace chain users with the new chain.
	assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));

	MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad);
	SDValue BitVec = DAG.getBitcast(MaskVT, Load);

	if (NumElts == 8)
	return DAG.getNode(ExtOpcode, dl, VT, BitVec);

	// we should take care to v4i1 and v2i1

	MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
	SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
	DAG.getIntPtrConstant(0, dl));
	}

	assert(VT == MVT::v32i8 && "Unexpected extload type");

	SmallVector<SDValue, 2> Chains;

	SDValue BasePtr = Ld->getBasePtr();
	SDValue LoadLo = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(),
	Ld->getBasePtr(),
	Ld->getMemOperand());
	Chains.push_back(LoadLo.getValue(1));

	SDValue BasePtrHi =
	DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
	DAG.getConstant(2, dl, BasePtr.getValueType()));

	SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(),
	BasePtrHi,
	Ld->getMemOperand());
	Chains.push_back(LoadHi.getValue(1));
	SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);

	SDValue Lo = DAG.getNode(ExtOpcode, dl, MVT::v16i8, LoadLo);
	SDValue Hi = DAG.getNode(ExtOpcode, dl, MVT::v16i8, LoadHi);
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v32i8, Lo, Hi);
	}

	// Lower vector extended loads using a shuffle. If SSSE3 is not available we
	// may emit an illegal shuffle but the expansion is still better than scalar
	// code. We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise
	// we'll emit a shuffle and a arithmetic shift.
	// FIXME: Is the expansion actually better than scalar code? It doesn't seem so.
	// TODO: It is possible to support ZExt by zeroing the undef values during
	// the shuffle phase or after the shuffle.
	static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT RegVT = Op.getSimpleValueType();
	assert(RegVT.isVector() && "We only custom lower vector sext loads.");
	assert(RegVT.isInteger() &&
	"We only custom lower integer vector sext loads.");

	// Nothing useful we can do without SSE2 shuffles.
	assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2.");

	LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
	SDLoc dl(Ld);
	EVT MemVT = Ld->getMemoryVT();
	if (MemVT.getScalarType() == MVT::i1)
	return LowerExtended1BitVectorLoad(Op, Subtarget, DAG);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	unsigned RegSz = RegVT.getSizeInBits();

	ISD::LoadExtType Ext = Ld->getExtensionType();

	assert((Ext == ISD::EXTLOAD \|\| Ext == ISD::SEXTLOAD)
	&& "Only anyext and sext are currently implemented.");
	assert(MemVT != RegVT && "Cannot extend to the same type");
	assert(MemVT.isVector() && "Must load a vector from memory");

	unsigned NumElems = RegVT.getVectorNumElements();
	unsigned MemSz = MemVT.getSizeInBits();
	assert(RegSz > MemSz && "Register size must be greater than the mem size");

	if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget.hasInt256()) {
	// The only way in which we have a legal 256-bit vector result but not the
	// integer 256-bit operations needed to directly lower a sextload is if we
	// have AVX1 but not AVX2. In that case, we can always emit a sextload to
	// a 128-bit vector and a normal sign_extend to 256-bits that should get
	// correctly legalized. We do this late to allow the canonical form of
	// sextload to persist throughout the rest of the DAG combiner -- it wants
	// to fold together any extensions it can, and so will fuse a sign_extend
	// of an sextload into a sextload targeting a wider value.
	SDValue Load;
	if (MemSz == 128) {
	// Just switch this to a normal load.
	assert(TLI.isTypeLegal(MemVT) && "If the memory type is a 128-bit type, "
	"it must be a legal 128-bit vector "
	"type!");
	Load = DAG.getLoad(MemVT, dl, Ld->getChain(), Ld->getBasePtr(),
	Ld->getPointerInfo(), Ld->getAlignment(),
	Ld->getMemOperand()->getFlags());
	} else {
	assert(MemSz < 128 &&
	"Can't extend a type wider than 128 bits to a 256 bit vector!");
	// Do an sext load to a 128-bit vector type. We want to use the same
	// number of elements, but elements half as wide. This will end up being
	// recursively lowered by this routine, but will succeed as we definitely
	// have all the necessary features if we're using AVX1.
	EVT HalfEltVT =
	EVT::getIntegerVT(*DAG.getContext(), RegVT.getScalarSizeInBits() / 2);
	EVT HalfVecVT = EVT::getVectorVT(*DAG.getContext(), HalfEltVT, NumElems);
	Load =
	DAG.getExtLoad(Ext, dl, HalfVecVT, Ld->getChain(), Ld->getBasePtr(),
	Ld->getPointerInfo(), MemVT, Ld->getAlignment(),
	Ld->getMemOperand()->getFlags());
	}

	// Replace chain users with the new chain.
	assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));

	// Finally, do a normal sign-extend to the desired register.
	return DAG.getSExtOrTrunc(Load, dl, RegVT);
	}

	// All sizes must be a power of two.
	assert(isPowerOf2_32(RegSz * MemSz * NumElems) &&
	"Non-power-of-two elements are not custom lowered!");

	// Attempt to load the original value using scalar loads.
	// Find the largest scalar type that divides the total loaded size.
	MVT SclrLoadTy = MVT::i8;
	for (MVT Tp : MVT::integer_valuetypes()) {
	if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
	SclrLoadTy = Tp;
	}
	}

	// On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
	if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
	(64 <= MemSz))
	SclrLoadTy = MVT::f64;

	// Calculate the number of scalar loads that we need to perform
	// in order to load our vector from memory.
	unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();

	assert((Ext != ISD::SEXTLOAD \|\| NumLoads == 1) &&
	"Can only lower sext loads with a single scalar load!");

	unsigned loadRegZize = RegSz;
	if (Ext == ISD::SEXTLOAD && RegSz >= 256)
	loadRegZize = 128;

	// Represent our vector as a sequence of elements which are the
	// largest scalar that we can load.
	EVT LoadUnitVecVT = EVT::getVectorVT(
	*DAG.getContext(), SclrLoadTy, loadRegZize / SclrLoadTy.getSizeInBits());

	// Represent the data using the same element type that is stored in
	// memory. In practice, we ''widen'' MemVT.
	EVT WideVecVT =
	EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
	loadRegZize / MemVT.getScalarSizeInBits());

	assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
	"Invalid vector type");

	// We can't shuffle using an illegal type.
	assert(TLI.isTypeLegal(WideVecVT) &&
	"We only lower types that form legal widened vector types");

	SmallVector<SDValue, 8> Chains;
	SDValue Ptr = Ld->getBasePtr();
	SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl,
	TLI.getPointerTy(DAG.getDataLayout()));
	SDValue Res = DAG.getUNDEF(LoadUnitVecVT);

	for (unsigned i = 0; i < NumLoads; ++i) {
	// Perform a single load.
	SDValue ScalarLoad =
	DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
	Ld->getAlignment(), Ld->getMemOperand()->getFlags());
	Chains.push_back(ScalarLoad.getValue(1));
	// Create the first element type using SCALAR_TO_VECTOR in order to avoid
	// another round of DAGCombining.
	if (i == 0)
	Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
	else
	Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
	ScalarLoad, DAG.getIntPtrConstant(i, dl));

	Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
	}

	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);

	// Bitcast the loaded value to a vector of the original element type, in
	// the size of the target vector type.
	SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res);
	unsigned SizeRatio = RegSz / MemSz;

	if (Ext == ISD::SEXTLOAD) {
	// If we have SSE4.1, we can directly emit a VSEXT node.
	if (Subtarget.hasSSE41()) {
	SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, RegVT, SlicedVec, DAG);
	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
	return Sext;
	}

	// Otherwise we'll use SIGN_EXTEND_VECTOR_INREG to sign extend the lowest
	// lanes.
	assert(TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND_VECTOR_INREG, RegVT) &&
	"We can't implement a sext load without SIGN_EXTEND_VECTOR_INREG!");

	SDValue Shuff = DAG.getSignExtendVectorInReg(SlicedVec, dl, RegVT);
	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
	return Shuff;
	}

	// Redistribute the loaded elements into the different locations.
	SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
	for (unsigned i = 0; i != NumElems; ++i)
	ShuffleVec[i * SizeRatio] = i;

	SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
	DAG.getUNDEF(WideVecVT), ShuffleVec);

	// Bitcast to the requested type.
	Shuff = DAG.getBitcast(RegVT, Shuff);
	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
	return Shuff;
	}

	/// Return true if node is an ISD::AND or ISD::OR of two X86ISD::SETCC nodes
	/// each of which has no other use apart from the AND / OR.
	static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
	Opc = Op.getOpcode();
	if (Opc != ISD::OR && Opc != ISD::AND)
	return false;
	return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
	Op.getOperand(0).hasOneUse() &&
	Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
	Op.getOperand(1).hasOneUse());
	}

	/// Return true if node is an ISD::XOR of a X86ISD::SETCC and 1 and that the
	/// SETCC node has a single use.
	static bool isXor1OfSetCC(SDValue Op) {
	if (Op.getOpcode() != ISD::XOR)
	return false;
	if (isOneConstant(Op.getOperand(1)))
	return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
	Op.getOperand(0).hasOneUse();
	return false;
	}

	SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
	bool addTest = true;
	SDValue Chain = Op.getOperand(0);
	SDValue Cond = Op.getOperand(1);
	SDValue Dest = Op.getOperand(2);
	SDLoc dl(Op);
	SDValue CC;
	bool Inverted = false;

	if (Cond.getOpcode() == ISD::SETCC) {
	// Check for setcc([su]{add,sub,mul}o == 0).
	if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
	isNullConstant(Cond.getOperand(1)) &&
	Cond.getOperand(0).getResNo() == 1 &&
	(Cond.getOperand(0).getOpcode() == ISD::SADDO \|\|
	Cond.getOperand(0).getOpcode() == ISD::UADDO \|\|
	Cond.getOperand(0).getOpcode() == ISD::SSUBO \|\|
	Cond.getOperand(0).getOpcode() == ISD::USUBO \|\|
	Cond.getOperand(0).getOpcode() == ISD::SMULO \|\|
	Cond.getOperand(0).getOpcode() == ISD::UMULO)) {
	Inverted = true;
	Cond = Cond.getOperand(0);
	} else {
	if (SDValue NewCond = LowerSETCC(Cond, DAG))
	Cond = NewCond;
	}
	}
	#if 0
	// FIXME: LowerXALUO doesn't handle these!!
	else if (Cond.getOpcode() == X86ISD::ADD \|\|
	Cond.getOpcode() == X86ISD::SUB \|\|
	Cond.getOpcode() == X86ISD::SMUL \|\|
	Cond.getOpcode() == X86ISD::UMUL)
	Cond = LowerXALUO(Cond, DAG);
	#endif

	// Look pass (and (setcc_carry (cmp ...)), 1).
	if (Cond.getOpcode() == ISD::AND &&
	Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY &&
	isOneConstant(Cond.getOperand(1)))
	Cond = Cond.getOperand(0);

	// If condition flag is set by a X86ISD::CMP, then use it as the condition
	// setting operand in place of the X86ISD::SETCC.
	unsigned CondOpcode = Cond.getOpcode();
	if (CondOpcode == X86ISD::SETCC \|\|
	CondOpcode == X86ISD::SETCC_CARRY) {
	CC = Cond.getOperand(0);

	SDValue Cmp = Cond.getOperand(1);
	unsigned Opc = Cmp.getOpcode();
	// FIXME: WHY THE SPECIAL CASING OF LogicalCmp??
	if (isX86LogicalCmp(Cmp) \|\| Opc == X86ISD::BT) {
	Cond = Cmp;
	addTest = false;
	} else {
	switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
	default: break;
	case X86::COND_O:
	case X86::COND_B:
	// These can only come from an arithmetic instruction with overflow,
	// e.g. SADDO, UADDO.
	Cond = Cond.getOperand(1);
	addTest = false;
	break;
	}
	}
	}
	CondOpcode = Cond.getOpcode();
	if (CondOpcode == ISD::UADDO \|\| CondOpcode == ISD::SADDO \|\|
	CondOpcode == ISD::USUBO \|\| CondOpcode == ISD::SSUBO \|\|
	((CondOpcode == ISD::UMULO \|\| CondOpcode == ISD::SMULO) &&
	Cond.getOperand(0).getValueType() != MVT::i8)) {
	SDValue LHS = Cond.getOperand(0);
	SDValue RHS = Cond.getOperand(1);
	unsigned X86Opcode;
	unsigned X86Cond;
	SDVTList VTs;
	// Keep this in sync with LowerXALUO, otherwise we might create redundant
	// instructions that can't be removed afterwards (i.e. X86ISD::ADD and
	// X86ISD::INC).
	switch (CondOpcode) {
	case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
	case ISD::SADDO:
	if (isOneConstant(RHS)) {
	X86Opcode = X86ISD::INC; X86Cond = X86::COND_O;
	break;
	}
	X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
	case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
	case ISD::SSUBO:
	if (isOneConstant(RHS)) {
	X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O;
	break;
	}
	X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
	case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
	case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
	default: llvm_unreachable("unexpected overflowing operator");
	}
	if (Inverted)
	X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond);
	if (CondOpcode == ISD::UMULO)
	VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
	MVT::i32);
	else
	VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);

	SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS);

	if (CondOpcode == ISD::UMULO)
	Cond = X86Op.getValue(2);
	else
	Cond = X86Op.getValue(1);

	CC = DAG.getConstant(X86Cond, dl, MVT::i8);
	addTest = false;
	} else {
	unsigned CondOpc;
	if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
	SDValue Cmp = Cond.getOperand(0).getOperand(1);
	if (CondOpc == ISD::OR) {
	// Also, recognize the pattern generated by an FCMP_UNE. We can emit
	// two branches instead of an explicit OR instruction with a
	// separate test.
	if (Cmp == Cond.getOperand(1).getOperand(1) &&
	isX86LogicalCmp(Cmp)) {
	CC = Cond.getOperand(0).getOperand(0);
	Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
	Chain, Dest, CC, Cmp);
	CC = Cond.getOperand(1).getOperand(0);
	Cond = Cmp;
	addTest = false;
	}
	} else { // ISD::AND
	// Also, recognize the pattern generated by an FCMP_OEQ. We can emit
	// two branches instead of an explicit AND instruction with a
	// separate test. However, we only do this if this block doesn't
	// have a fall-through edge, because this requires an explicit
	// jmp when the condition is false.
	if (Cmp == Cond.getOperand(1).getOperand(1) &&
	isX86LogicalCmp(Cmp) &&
	Op.getNode()->hasOneUse()) {
	X86::CondCode CCode =
	(X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
	CCode = X86::GetOppositeBranchCondition(CCode);
	CC = DAG.getConstant(CCode, dl, MVT::i8);
	SDNode User = Op.getNode()->use_begin();
	// Look for an unconditional branch following this conditional branch.
	// We need this because we need to reverse the successors in order
	// to implement FCMP_OEQ.
	if (User->getOpcode() == ISD::BR) {
	SDValue FalseBB = User->getOperand(1);
	SDNode *NewBR =
	DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
	assert(NewBR == User);
	(void)NewBR;
	Dest = FalseBB;

	Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
	Chain, Dest, CC, Cmp);
	X86::CondCode CCode =
	(X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
	CCode = X86::GetOppositeBranchCondition(CCode);
	CC = DAG.getConstant(CCode, dl, MVT::i8);
	Cond = Cmp;
	addTest = false;
	}
	}
	}
	} else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
	// Recognize for xorb (setcc), 1 patterns. The xor inverts the condition.
	// It should be transformed during dag combiner except when the condition
	// is set by a arithmetics with overflow node.
	X86::CondCode CCode =
	(X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
	CCode = X86::GetOppositeBranchCondition(CCode);
	CC = DAG.getConstant(CCode, dl, MVT::i8);
	Cond = Cond.getOperand(0).getOperand(1);
	addTest = false;
	} else if (Cond.getOpcode() == ISD::SETCC &&
	cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETOEQ) {
	// For FCMP_OEQ, we can emit
	// two branches instead of an explicit AND instruction with a
	// separate test. However, we only do this if this block doesn't
	// have a fall-through edge, because this requires an explicit
	// jmp when the condition is false.
	if (Op.getNode()->hasOneUse()) {
	SDNode User = Op.getNode()->use_begin();
	// Look for an unconditional branch following this conditional branch.
	// We need this because we need to reverse the successors in order
	// to implement FCMP_OEQ.
	if (User->getOpcode() == ISD::BR) {
	SDValue FalseBB = User->getOperand(1);
	SDNode *NewBR =
	DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
	assert(NewBR == User);
	(void)NewBR;
	Dest = FalseBB;

	SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
	Cond.getOperand(0), Cond.getOperand(1));
	Cmp = ConvertCmpIfNecessary(Cmp, DAG);
	CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
	Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
	Chain, Dest, CC, Cmp);
	CC = DAG.getConstant(X86::COND_P, dl, MVT::i8);
	Cond = Cmp;
	addTest = false;
	}
	}
	} else if (Cond.getOpcode() == ISD::SETCC &&
	cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUNE) {
	// For FCMP_UNE, we can emit
	// two branches instead of an explicit AND instruction with a
	// separate test. However, we only do this if this block doesn't
	// have a fall-through edge, because this requires an explicit
	// jmp when the condition is false.
	if (Op.getNode()->hasOneUse()) {
	SDNode User = Op.getNode()->use_begin();
	// Look for an unconditional branch following this conditional branch.
	// We need this because we need to reverse the successors in order
	// to implement FCMP_UNE.
	if (User->getOpcode() == ISD::BR) {
	SDValue FalseBB = User->getOperand(1);
	SDNode *NewBR =
	DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
	assert(NewBR == User);
	(void)NewBR;

	SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
	Cond.getOperand(0), Cond.getOperand(1));
	Cmp = ConvertCmpIfNecessary(Cmp, DAG);
	CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8);
	Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
	Chain, Dest, CC, Cmp);
	CC = DAG.getConstant(X86::COND_NP, dl, MVT::i8);
	Cond = Cmp;
	addTest = false;
	Dest = FalseBB;
	}
	}
	}
	}

	if (addTest) {
	// Look pass the truncate if the high bits are known zero.
	if (isTruncWithZeroHighBitsInput(Cond, DAG))
	Cond = Cond.getOperand(0);

	// We know the result is compared against zero. Try to match it to BT.
	if (Cond.hasOneUse()) {
	if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG)) {
	CC = NewSetCC.getOperand(0);
	Cond = NewSetCC.getOperand(1);
	addTest = false;
	}
	}
	}

	if (addTest) {
	X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
	CC = DAG.getConstant(X86Cond, dl, MVT::i8);
	Cond = EmitTest(Cond, X86Cond, dl, DAG);
	}
	Cond = ConvertCmpIfNecessary(Cond, DAG);
	return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
	Chain, Dest, CC, Cond);
	}

	// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
	// Calls to _alloca are needed to probe the stack when allocating more than 4k
	// bytes in one go. Touching the stack at 4K increments is necessary to ensure
	// that the guard pages used by the OS virtual memory manager are allocated in
	// correct sequence.
	SDValue
	X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	bool SplitStack = MF.shouldSplitStack();
	bool EmitStackProbe = !getStackProbeSymbolName(MF).empty();
	bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) \|\|
	SplitStack \|\| EmitStackProbe;
	SDLoc dl(Op);

	// Get the inputs.
	SDNode *Node = Op.getNode();
	SDValue Chain = Op.getOperand(0);
	SDValue Size = Op.getOperand(1);
	unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
	EVT VT = Node->getValueType(0);

	// Chain the dynamic stack allocation so that it doesn't modify the stack
	// pointer when other instructions are using the stack.
	Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);

	bool Is64Bit = Subtarget.is64Bit();
	MVT SPTy = getPointerTy(DAG.getDataLayout());

	SDValue Result;
	if (!Lower) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
	assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
	" not tell us which reg is the stack pointer!");

	SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
	Chain = SP.getValue(1);
	const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
	unsigned StackAlign = TFI.getStackAlignment();
	Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
	if (Align > StackAlign)
	Result = DAG.getNode(ISD::AND, dl, VT, Result,
	DAG.getConstant(-(uint64_t)Align, dl, VT));
	Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
	} else if (SplitStack) {
	MachineRegisterInfo &MRI = MF.getRegInfo();

	if (Is64Bit) {
	// The 64 bit implementation of segmented stacks needs to clobber both r10
	// r11. This makes it impossible to use it along with nested parameters.
	const Function *F = MF.getFunction();
	for (const auto &A : F->args()) {
	if (A.hasNestAttr())
	report_fatal_error("Cannot use segmented stacks with functions that "
	"have nested arguments.");
	}
	}

	const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
	unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
	Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
	Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
	DAG.getRegister(Vreg, SPTy));
	} else {
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Size);
	MF.getInfo<X86MachineFunctionInfo>()->setHasWinAlloca(true);

	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	unsigned SPReg = RegInfo->getStackRegister();
	SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
	Chain = SP.getValue(1);

	if (Align) {
	SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
	DAG.getConstant(-(uint64_t)Align, dl, VT));
	Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
	}

	Result = SP;
	}

	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
	DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);

	SDValue Ops[2] = {Result, Chain};
	return DAG.getMergeValues(Ops, dl);
	}

	SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	auto PtrVT = getPointerTy(MF.getDataLayout());
	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();

	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	SDLoc DL(Op);

	if (!Subtarget.is64Bit() \|\|
	Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv())) {
	// vastart just stores the address of the VarArgsFrameIndex slot into the
	// memory location argument.
	SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
	return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
	MachinePointerInfo(SV));
	}

	// __va_list_tag:
	// gp_offset (0 - 6 * 8)
	// fp_offset (48 - 48 + 8 * 16)
	// overflow_arg_area (point to parameters coming in memory).
	// reg_save_area
	SmallVector<SDValue, 8> MemOps;
	SDValue FIN = Op.getOperand(1);
	// Store gp_offset
	SDValue Store = DAG.getStore(
	Op.getOperand(0), DL,
	DAG.getConstant(FuncInfo->getVarArgsGPOffset(), DL, MVT::i32), FIN,
	MachinePointerInfo(SV));
	MemOps.push_back(Store);

	// Store fp_offset
	FIN = DAG.getMemBasePlusOffset(FIN, 4, DL);
	Store = DAG.getStore(
	Op.getOperand(0), DL,
	DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN,
	MachinePointerInfo(SV, 4));
	MemOps.push_back(Store);

	// Store ptr to overflow_arg_area
	FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL));
	SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
	Store =
	DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, MachinePointerInfo(SV, 8));
	MemOps.push_back(Store);

	// Store ptr to reg_save_area.
	FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(
	Subtarget.isTarget64BitLP64() ? 8 : 4, DL));
	SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
	Store = DAG.getStore(
	Op.getOperand(0), DL, RSFIN, FIN,
	MachinePointerInfo(SV, Subtarget.isTarget64BitLP64() ? 16 : 12));
	MemOps.push_back(Store);
	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
	}

	SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
	assert(Subtarget.is64Bit() &&
	"LowerVAARG only handles 64-bit va_arg!");
	assert(Op.getNumOperands() == 4);

	MachineFunction &MF = DAG.getMachineFunction();
	if (Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()))
	// The Win64 ABI uses char* instead of a structure.
	return DAG.expandVAArg(Op.getNode());

	SDValue Chain = Op.getOperand(0);
	SDValue SrcPtr = Op.getOperand(1);
	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	unsigned Align = Op.getConstantOperandVal(3);
	SDLoc dl(Op);

	EVT ArgVT = Op.getNode()->getValueType(0);
	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());
	uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
	uint8_t ArgMode;

	// Decide which area this value should be read from.
	// TODO: Implement the AMD64 ABI in its entirety. This simple
	// selection mechanism works only for the basic types.
	if (ArgVT == MVT::f80) {
	llvm_unreachable("va_arg for f80 not yet implemented");
	} else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /bytes/) {
	ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
	} else if (ArgVT.isInteger() && ArgSize <= 32 /bytes/) {
	ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
	} else {
	llvm_unreachable("Unhandled argument type in LowerVAARG");
	}

	if (ArgMode == 2) {
	// Sanity Check: Make sure using fp_offset makes sense.
	assert(!Subtarget.useSoftFloat() &&
	!(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) &&
	Subtarget.hasSSE1());
	}

	// Insert VAARG_64 node into the DAG
	// VAARG_64 returns two values: Variable Argument Address, Chain
	SDValue InstOps[] = {Chain, SrcPtr, DAG.getConstant(ArgSize, dl, MVT::i32),
	DAG.getConstant(ArgMode, dl, MVT::i8),
	DAG.getConstant(Align, dl, MVT::i32)};
	SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other);
	SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
	VTs, InstOps, MVT::i64,
	MachinePointerInfo(SV),
	/Align=/0,
	/Volatile=/false,
	/ReadMem=/true,
	/WriteMem=/true);
	Chain = VAARG.getValue(1);

	// Load the next argument and return it
	return DAG.getLoad(ArgVT, dl, Chain, VAARG, MachinePointerInfo());
	}

	static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	// X86-64 va_list is a struct { i32, i32, i8, i8 }, except on Windows,
	// where a va_list is still an i8*.
	assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!");
	if (Subtarget.isCallingConvWin64(
	DAG.getMachineFunction().getFunction()->getCallingConv()))
	// Probably a Win64 va_copy.
	return DAG.expandVACopy(Op.getNode());

	SDValue Chain = Op.getOperand(0);
	SDValue DstPtr = Op.getOperand(1);
	SDValue SrcPtr = Op.getOperand(2);
	const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
	const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
	SDLoc DL(Op);

	return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
	DAG.getIntPtrConstant(24, DL), 8, /isVolatile/false,
	false, false,
	MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
	}

	/// Handle vector element shifts where the shift amount is a constant.
	/// Takes immediate version of shift as input.
	static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
	SDValue SrcOp, uint64_t ShiftAmt,
	SelectionDAG &DAG) {
	MVT ElementType = VT.getVectorElementType();

	// Bitcast the source vector to the output type, this is mainly necessary for
	// vXi8/vXi64 shifts.
	if (VT != SrcOp.getSimpleValueType())
	SrcOp = DAG.getBitcast(VT, SrcOp);

	// Fold this packed shift into its first operand if ShiftAmt is 0.
	if (ShiftAmt == 0)
	return SrcOp;

	// Check for ShiftAmt >= element width
	if (ShiftAmt >= ElementType.getSizeInBits()) {
	if (Opc == X86ISD::VSRAI)
	ShiftAmt = ElementType.getSizeInBits() - 1;
	else
	return DAG.getConstant(0, dl, VT);
	}

	assert((Opc == X86ISD::VSHLI \|\| Opc == X86ISD::VSRLI \|\| Opc == X86ISD::VSRAI)
	&& "Unknown target vector shift-by-constant node");

	// Fold this packed vector shift into a build vector if SrcOp is a
	// vector of Constants or UNDEFs.
	if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
	SmallVector<SDValue, 8> Elts;
	unsigned NumElts = SrcOp->getNumOperands();
	ConstantSDNode *ND;

	switch(Opc) {
	default: llvm_unreachable("Unknown opcode!");
	case X86ISD::VSHLI:
	for (unsigned i=0; i!=NumElts; ++i) {
	SDValue CurrentOp = SrcOp->getOperand(i);
	if (CurrentOp->isUndef()) {
	Elts.push_back(CurrentOp);
	continue;
	}
	ND = cast<ConstantSDNode>(CurrentOp);
	const APInt &C = ND->getAPIntValue();
	Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType));
	}
	break;
	case X86ISD::VSRLI:
	for (unsigned i=0; i!=NumElts; ++i) {
	SDValue CurrentOp = SrcOp->getOperand(i);
	if (CurrentOp->isUndef()) {
	Elts.push_back(CurrentOp);
	continue;
	}
	ND = cast<ConstantSDNode>(CurrentOp);
	const APInt &C = ND->getAPIntValue();
	Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType));
	}
	break;
	case X86ISD::VSRAI:
	for (unsigned i=0; i!=NumElts; ++i) {
	SDValue CurrentOp = SrcOp->getOperand(i);
	if (CurrentOp->isUndef()) {
	Elts.push_back(CurrentOp);
	continue;
	}
	ND = cast<ConstantSDNode>(CurrentOp);
	const APInt &C = ND->getAPIntValue();
	Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType));
	}
	break;
	}

	return DAG.getBuildVector(VT, dl, Elts);
	}

	return DAG.getNode(Opc, dl, VT, SrcOp,
	DAG.getConstant(ShiftAmt, dl, MVT::i8));
	}

	/// Handle vector element shifts where the shift amount may or may not be a
	/// constant. Takes immediate version of shift as input.
	static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
	SDValue SrcOp, SDValue ShAmt,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT SVT = ShAmt.getSimpleValueType();
	assert((SVT == MVT::i32 \|\| SVT == MVT::i64) && "Unexpected value type!");

	// Catch shift-by-constant.
	if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
	return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
	CShAmt->getZExtValue(), DAG);

	// Change opcode to non-immediate version
	switch (Opc) {
	default: llvm_unreachable("Unknown target vector shift node");
	case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
	case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
	case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
	}

	// Need to build a vector containing shift amount.
	// SSE/AVX packed shifts only use the lower 64-bit of the shift count.
	// +=================+============+=======================================+
	// \| ShAmt is \| HasSSE4.1? \| Construct ShAmt vector as \|
	// +=================+============+=======================================+
	// \| i64 \| Yes, No \| Use ShAmt as lowest elt \|
	// \| i32 \| Yes \| zero-extend in-reg \|
	// \| (i32 zext(i16)) \| Yes \| zero-extend in-reg \|
	// \| i16/i32 \| No \| v4i32 build_vector(ShAmt, 0, ud, ud)) \|
	// +=================+============+=======================================+

	if (SVT == MVT::i64)
	ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
	else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
	ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
	ShAmt = ShAmt.getOperand(0);
	ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v8i16, ShAmt);
	ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
	} else if (Subtarget.hasSSE41() &&
	ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
	ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
	ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
	} else {
	SmallVector<SDValue, 4> ShOps = {ShAmt, DAG.getConstant(0, dl, SVT),
	DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
	ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
	}

	// The return type has to be a 128-bit type with the same element
	// type as the input type.
	MVT EltVT = VT.getVectorElementType();
	MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());

	ShAmt = DAG.getBitcast(ShVT, ShAmt);
	return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
	}

	/// \brief Return Mask with the necessary casting or extending
	/// for \p Mask according to \p MaskVT when lowering masking intrinsics
	static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
	const X86Subtarget &Subtarget, SelectionDAG &DAG,
	const SDLoc &dl) {

	if (isAllOnesConstant(Mask))
	return DAG.getTargetConstant(1, dl, MaskVT);
	if (X86::isZeroNode(Mask))
	return DAG.getTargetConstant(0, dl, MaskVT);

	if (MaskVT.bitsGT(Mask.getSimpleValueType())) {
	// Mask should be extended
	Mask = DAG.getNode(ISD::ANY_EXTEND, dl,
	MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask);
	}

	if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) {
	if (MaskVT == MVT::v64i1) {
	assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
	// In case 32bit mode, bitcast i64 is illegal, extend/split it.
	SDValue Lo, Hi;
	Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
	DAG.getConstant(0, dl, MVT::i32));
	Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
	DAG.getConstant(1, dl, MVT::i32));

	Lo = DAG.getBitcast(MVT::v32i1, Lo);
	Hi = DAG.getBitcast(MVT::v32i1, Hi);

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
	} else {
	// MaskVT require < 64bit. Truncate mask (should succeed in any case),
	// and bitcast.
	MVT TruncVT = MVT::getIntegerVT(MaskVT.getSizeInBits());
	return DAG.getBitcast(MaskVT,
	DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Mask));
	}

	} else {
	MVT BitcastVT = MVT::getVectorVT(MVT::i1,
	Mask.getSimpleValueType().getSizeInBits());
	// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
	// are extracted by EXTRACT_SUBVECTOR.
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
	DAG.getBitcast(BitcastVT, Mask),
	DAG.getIntPtrConstant(0, dl));
	}
	}

	/// \brief Return (and \p Op, \p Mask) for compare instructions or
	/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
	/// necessary casting or extending for \p Mask when lowering masking intrinsics
	static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
	SDValue PreservedSrc,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
	unsigned OpcodeSelect = ISD::VSELECT;
	SDLoc dl(Op);

	if (isAllOnesConstant(Mask))
	return Op;

	SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);

	switch (Op.getOpcode()) {
	default: break;
	case X86ISD::PCMPEQM:
	case X86ISD::PCMPGTM:
	case X86ISD::CMPM:
	case X86ISD::CMPMU:
	return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
	case X86ISD::VFPCLASS:
	case X86ISD::VFPCLASSS:
	return DAG.getNode(ISD::OR, dl, VT, Op, VMask);
	case X86ISD::VTRUNC:
	case X86ISD::VTRUNCS:
	case X86ISD::VTRUNCUS:
	case X86ISD::CVTPS2PH:
	// We can't use ISD::VSELECT here because it is not always "Legal"
	// for the destination type. For example vpmovqb require only AVX512
	// and vselect that can operate on byte element type require BWI
	OpcodeSelect = X86ISD::SELECT;
	break;
	}
	if (PreservedSrc.isUndef())
	PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
	return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);
	}

	/// \brief Creates an SDNode for a predicated scalar operation.
	/// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc).
	/// The mask is coming as MVT::i8 and it should be transformed
	/// to MVT::v1i1 while lowering masking intrinsics.
	/// The main difference between ScalarMaskingNode and VectorMaskingNode is using
	/// "X86select" instead of "vselect". We just can't create the "vselect" node
	/// for a scalar instruction.
	static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
	SDValue PreservedSrc,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {

	if (auto *MaskConst = dyn_cast<ConstantSDNode>(Mask))
	if (MaskConst->getZExtValue() & 0x1)
	return Op;

	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);

	SDValue IMask = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Mask);
	if (Op.getOpcode() == X86ISD::FSETCCM \|\|
	Op.getOpcode() == X86ISD::FSETCCM_RND)
	return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
	if (Op.getOpcode() == X86ISD::VFPCLASSS)
	return DAG.getNode(ISD::OR, dl, VT, Op, IMask);

	if (PreservedSrc.isUndef())
	PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
	return DAG.getNode(X86ISD::SELECTS, dl, VT, IMask, Op, PreservedSrc);
	}

	static int getSEHRegistrationNodeSize(const Function *Fn) {
	if (!Fn->hasPersonalityFn())
	report_fatal_error(
	"querying registration node size for function without personality");
	// The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
	// WinEHStatePass for the full struct definition.
	switch (classifyEHPersonality(Fn->getPersonalityFn())) {
	case EHPersonality::MSVC_X86SEH: return 24;
	case EHPersonality::MSVC_CXX: return 16;
	default: break;
	}
	report_fatal_error(
	"can only recover FP for 32-bit MSVC EH personality functions");
	}

	/// When the MSVC runtime transfers control to us, either to an outlined
	/// function or when returning to a parent frame after catching an exception, we
	/// recover the parent frame pointer by doing arithmetic on the incoming EBP.
	/// Here's the math:
	/// RegNodeBase = EntryEBP - RegNodeSize
	/// ParentFP = RegNodeBase - ParentFrameOffset
	/// Subtracting RegNodeSize takes us to the offset of the registration node, and
	/// subtracting the offset (negative on x86) takes us back to the parent FP.
	static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
	SDValue EntryEBP) {
	MachineFunction &MF = DAG.getMachineFunction();
	SDLoc dl;

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());

	// It's possible that the parent function no longer has a personality function
	// if the exceptional code was optimized away, in which case we just return
	// the incoming EBP.
	if (!Fn->hasPersonalityFn())
	return EntryEBP;

	// Get an MCSymbol that will ultimately resolve to the frame offset of the EH
	// registration, or the .set_setframe offset.
	MCSymbol *OffsetSym =
	MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
	GlobalValue::dropLLVMManglingEscape(Fn->getName()));
	SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
	SDValue ParentFrameOffset =
	DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal);

	// Return EntryEBP + ParentFrameOffset for x64. This adjusts from RSP after
	// prologue to RBP in the parent function.
	const X86Subtarget &Subtarget =
	static_cast<const X86Subtarget &>(DAG.getSubtarget());
	if (Subtarget.is64Bit())
	return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset);

	int RegNodeSize = getSEHRegistrationNodeSize(Fn);
	// RegNodeBase = EntryEBP - RegNodeSize
	// ParentFP = RegNodeBase - ParentFrameOffset
	SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP,
	DAG.getConstant(RegNodeSize, dl, PtrVT));
	return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset);
	}

	static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	// Helper to detect if the operand is CUR_DIRECTION rounding mode.
	auto isRoundModeCurDirection = [](SDValue Rnd) {
	if (!isa<ConstantSDNode>(Rnd))
	return false;

	unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
	return Round == X86::STATIC_ROUNDING::CUR_DIRECTION;
	};

	SDLoc dl(Op);
	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	MVT VT = Op.getSimpleValueType();
	const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
	if (IntrData) {
	switch(IntrData->Type) {
	case INTR_TYPE_1OP:
	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
	case INTR_TYPE_2OP:
	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
	Op.getOperand(2));
	case INTR_TYPE_3OP:
	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
	Op.getOperand(2), Op.getOperand(3));
	case INTR_TYPE_4OP:
	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
	case INTR_TYPE_1OP_MASK_RM: {
	SDValue Src = Op.getOperand(1);
	SDValue PassThru = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);
	SDValue RoundingMode;
	// We always add rounding mode to the Node.
	// If the rounding mode is not specified, we add the
	// "current direction" mode.
	if (Op.getNumOperands() == 4)
	RoundingMode =
	DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
	else
	RoundingMode = Op.getOperand(4);
	assert(IntrData->Opc1 == 0 && "Unexpected second opcode!");
	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
	RoundingMode),
	Mask, PassThru, Subtarget, DAG);
	}
	case INTR_TYPE_1OP_MASK: {
	SDValue Src = Op.getOperand(1);
	SDValue PassThru = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);
	// We add rounding mode to the Node when
	// - RM Opcode is specified and
	// - RM is not "current direction".
	unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
	if (IntrWithRoundingModeOpcode != 0) {
	SDValue Rnd = Op.getOperand(4);
	if (!isRoundModeCurDirection(Rnd)) {
	return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
	dl, Op.getValueType(),
	Src, Rnd),
	Mask, PassThru, Subtarget, DAG);
	}
	}
	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
	Mask, PassThru, Subtarget, DAG);
	}
	case INTR_TYPE_SCALAR_MASK: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue passThru = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);
	unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
	if (IntrWithRoundingModeOpcode != 0) {
	SDValue Rnd = Op.getOperand(5);
	if (!isRoundModeCurDirection(Rnd))
	return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
	dl, VT, Src1, Src2, Rnd),
	Mask, passThru, Subtarget, DAG);
	}
	return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2),
	Mask, passThru, Subtarget, DAG);
	}
	case INTR_TYPE_SCALAR_MASK_RM: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src0 = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);
	// There are 2 kinds of intrinsics in this group:
	// (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands
	// (2) With rounding mode and sae - 7 operands.
	if (Op.getNumOperands() == 6) {
	SDValue Sae = Op.getOperand(5);
	return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
	Sae),
	Mask, Src0, Subtarget, DAG);
	}
	assert(Op.getNumOperands() == 7 && "Unexpected intrinsic form");
	SDValue RoundingMode = Op.getOperand(5);
	SDValue Sae = Op.getOperand(6);
	return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
	RoundingMode, Sae),
	Mask, Src0, Subtarget, DAG);
	}
	case INTR_TYPE_2OP_MASK:
	case INTR_TYPE_2OP_IMM8_MASK: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue PassThru = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);

	if (IntrData->Type == INTR_TYPE_2OP_IMM8_MASK)
	Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2);

	// We specify 2 possible opcodes for intrinsics with rounding modes.
	// First, we check if the intrinsic may have non-default rounding mode,
	// (IntrData->Opc1 != 0), then we check the rounding mode operand.
	unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
	if (IntrWithRoundingModeOpcode != 0) {
	SDValue Rnd = Op.getOperand(5);
	if (!isRoundModeCurDirection(Rnd)) {
	return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
	dl, Op.getValueType(),
	Src1, Src2, Rnd),
	Mask, PassThru, Subtarget, DAG);
	}
	}
	// TODO: Intrinsics should have fast-math-flags to propagate.
	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2),
	Mask, PassThru, Subtarget, DAG);
	}
	case INTR_TYPE_2OP_MASK_RM: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue PassThru = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);
	// We specify 2 possible modes for intrinsics, with/without rounding
	// modes.
	// First, we check if the intrinsic have rounding mode (6 operands),
	// if not, we set rounding mode to "current".
	SDValue Rnd;
	if (Op.getNumOperands() == 6)
	Rnd = Op.getOperand(5);
	else
	Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
	Src1, Src2, Rnd),
	Mask, PassThru, Subtarget, DAG);
	}
	case INTR_TYPE_3OP_SCALAR_MASK_RM: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src3 = Op.getOperand(3);
	SDValue PassThru = Op.getOperand(4);
	SDValue Mask = Op.getOperand(5);
	SDValue Sae = Op.getOperand(6);

	return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
	Src2, Src3, Sae),
	Mask, PassThru, Subtarget, DAG);
	}
	case INTR_TYPE_3OP_MASK_RM: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Imm = Op.getOperand(3);
	SDValue PassThru = Op.getOperand(4);
	SDValue Mask = Op.getOperand(5);
	// We specify 2 possible modes for intrinsics, with/without rounding
	// modes.
	// First, we check if the intrinsic have rounding mode (7 operands),
	// if not, we set rounding mode to "current".
	SDValue Rnd;
	if (Op.getNumOperands() == 7)
	Rnd = Op.getOperand(6);
	else
	Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
	Src1, Src2, Imm, Rnd),
	Mask, PassThru, Subtarget, DAG);
	}
	case INTR_TYPE_3OP_IMM8_MASK:
	case INTR_TYPE_3OP_MASK: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src3 = Op.getOperand(3);
	SDValue PassThru = Op.getOperand(4);
	SDValue Mask = Op.getOperand(5);

	if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
	Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);

	// We specify 2 possible opcodes for intrinsics with rounding modes.
	// First, we check if the intrinsic may have non-default rounding mode,
	// (IntrData->Opc1 != 0), then we check the rounding mode operand.
	unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
	if (IntrWithRoundingModeOpcode != 0) {
	SDValue Rnd = Op.getOperand(6);
	if (!isRoundModeCurDirection(Rnd)) {
	return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
	dl, Op.getValueType(),
	Src1, Src2, Src3, Rnd),
	Mask, PassThru, Subtarget, DAG);
	}
	}
	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
	Src1, Src2, Src3),
	Mask, PassThru, Subtarget, DAG);
	}
	case VPERM_2OP_MASK : {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue PassThru = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);

	// Swap Src1 and Src2 in the node creation
	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1),
	Mask, PassThru, Subtarget, DAG);
	}
	case VPERM_3OP_MASKZ:
	case VPERM_3OP_MASK:{
	MVT VT = Op.getSimpleValueType();
	// Src2 is the PassThru
	SDValue Src1 = Op.getOperand(1);
	// PassThru needs to be the same type as the destination in order
	// to pattern match correctly.
	SDValue Src2 = DAG.getBitcast(VT, Op.getOperand(2));
	SDValue Src3 = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);
	SDValue PassThru = SDValue();

	// set PassThru element
	if (IntrData->Type == VPERM_3OP_MASKZ)
	PassThru = getZeroVector(VT, Subtarget, DAG, dl);
	else
	PassThru = Src2;

	// Swap Src1 and Src2 in the node creation
	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
	dl, Op.getValueType(),
	Src2, Src1, Src3),
	Mask, PassThru, Subtarget, DAG);
	}
	case FMA_OP_MASK3:
	case FMA_OP_MASKZ:
	case FMA_OP_MASK: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src3 = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);
	MVT VT = Op.getSimpleValueType();
	SDValue PassThru = SDValue();

	// set PassThru element
	if (IntrData->Type == FMA_OP_MASKZ)
	PassThru = getZeroVector(VT, Subtarget, DAG, dl);
	else if (IntrData->Type == FMA_OP_MASK3)
	PassThru = Src3;
	else
	PassThru = Src1;

	// We specify 2 possible opcodes for intrinsics with rounding modes.
	// First, we check if the intrinsic may have non-default rounding mode,
	// (IntrData->Opc1 != 0), then we check the rounding mode operand.
	unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
	if (IntrWithRoundingModeOpcode != 0) {
	SDValue Rnd = Op.getOperand(5);
	if (!isRoundModeCurDirection(Rnd))
	return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
	dl, Op.getValueType(),
	Src1, Src2, Src3, Rnd),
	Mask, PassThru, Subtarget, DAG);
	}
	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
	dl, Op.getValueType(),
	Src1, Src2, Src3),
	Mask, PassThru, Subtarget, DAG);
	}
	case FMA_OP_SCALAR_MASK:
	case FMA_OP_SCALAR_MASK3:
	case FMA_OP_SCALAR_MASKZ: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src3 = Op.getOperand(3);
	SDValue Mask = Op.getOperand(4);
	MVT VT = Op.getSimpleValueType();
	SDValue PassThru = SDValue();

	// set PassThru element
	if (IntrData->Type == FMA_OP_SCALAR_MASKZ)
	PassThru = getZeroVector(VT, Subtarget, DAG, dl);
	else if (IntrData->Type == FMA_OP_SCALAR_MASK3)
	PassThru = Src3;
	else
	PassThru = Src1;

	SDValue Rnd = Op.getOperand(5);
	return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl,
	Op.getValueType(), Src1, Src2,
	Src3, Rnd),
	Mask, PassThru, Subtarget, DAG);
	}
	case TERLOG_OP_MASK:
	case TERLOG_OP_MASKZ: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src3 = Op.getOperand(3);
	SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4));
	SDValue Mask = Op.getOperand(5);
	MVT VT = Op.getSimpleValueType();
	SDValue PassThru = Src1;
	// Set PassThru element.
	if (IntrData->Type == TERLOG_OP_MASKZ)
	PassThru = getZeroVector(VT, Subtarget, DAG, dl);

	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
	Src1, Src2, Src3, Src4),
	Mask, PassThru, Subtarget, DAG);
	}
	case CVTPD2PS:
	// ISD::FP_ROUND has a second argument that indicates if the truncation
	// does not change the value. Set it to 0 since it can change.
	return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
	DAG.getIntPtrConstant(0, dl));
	case CVTPD2PS_MASK: {
	SDValue Src = Op.getOperand(1);
	SDValue PassThru = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);
	// We add rounding mode to the Node when
	// - RM Opcode is specified and
	// - RM is not "current direction".
	unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
	if (IntrWithRoundingModeOpcode != 0) {
	SDValue Rnd = Op.getOperand(4);
	if (!isRoundModeCurDirection(Rnd)) {
	return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
	dl, Op.getValueType(),
	Src, Rnd),
	Mask, PassThru, Subtarget, DAG);
	}
	}
	assert(IntrData->Opc0 == ISD::FP_ROUND && "Unexpected opcode!");
	// ISD::FP_ROUND has a second argument that indicates if the truncation
	// does not change the value. Set it to 0 since it can change.
	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
	DAG.getIntPtrConstant(0, dl)),
	Mask, PassThru, Subtarget, DAG);
	}
	case FPCLASS: {
	// FPclass intrinsics with mask
	SDValue Src1 = Op.getOperand(1);
	MVT VT = Src1.getSimpleValueType();
	MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
	SDValue Imm = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);
	MVT BitcastVT = MVT::getVectorVT(MVT::i1,
	Mask.getSimpleValueType().getSizeInBits());
	SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm);
	SDValue FPclassMask = getVectorMaskingNode(FPclass, Mask,
	DAG.getTargetConstant(0, dl, MaskVT),
	Subtarget, DAG);
	SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
	DAG.getUNDEF(BitcastVT), FPclassMask,
	DAG.getIntPtrConstant(0, dl));
	return DAG.getBitcast(Op.getValueType(), Res);
	}
	case FPCLASSS: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Imm = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);
	SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm);
	SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask,
	DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG);
	return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i8, FPclassMask,
	DAG.getIntPtrConstant(0, dl));
	}
	case CMP_MASK:
	case CMP_MASK_CC: {
	// Comparison intrinsics with masks.
	// Example of transformation:
	// (i8 (int_x86_avx512_mask_pcmpeq_q_128
	// (v2i64 %a), (v2i64 %b), (i8 %mask))) ->
	// (i8 (bitcast
	// (v8i1 (insert_subvector undef,
	// (v2i1 (and (PCMPEQM %a, %b),
	// (extract_subvector
	// (v8i1 (bitcast %mask)), 0))), 0))))
	MVT VT = Op.getOperand(1).getSimpleValueType();
	MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
	SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3);
	MVT BitcastVT = MVT::getVectorVT(MVT::i1,
	Mask.getSimpleValueType().getSizeInBits());
	SDValue Cmp;
	if (IntrData->Type == CMP_MASK_CC) {
	SDValue CC = Op.getOperand(3);
	CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC);
	// We specify 2 possible opcodes for intrinsics with rounding modes.
	// First, we check if the intrinsic may have non-default rounding mode,
	// (IntrData->Opc1 != 0), then we check the rounding mode operand.
	if (IntrData->Opc1 != 0) {
	SDValue Rnd = Op.getOperand(5);
	if (!isRoundModeCurDirection(Rnd))
	Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
	Op.getOperand(2), CC, Rnd);
	}
	//default rounding mode
	if(!Cmp.getNode())
	Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
	Op.getOperand(2), CC);

	} else {
	assert(IntrData->Type == CMP_MASK && "Unexpected intrinsic type!");
	Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
	Op.getOperand(2));
	}
	SDValue CmpMask = getVectorMaskingNode(Cmp, Mask,
	DAG.getTargetConstant(0, dl,
	MaskVT),
	Subtarget, DAG);
	SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
	DAG.getUNDEF(BitcastVT), CmpMask,
	DAG.getIntPtrConstant(0, dl));
	return DAG.getBitcast(Op.getValueType(), Res);
	}
	case CMP_MASK_SCALAR_CC: {
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3));
	SDValue Mask = Op.getOperand(4);

	SDValue Cmp;
	if (IntrData->Opc1 != 0) {
	SDValue Rnd = Op.getOperand(5);
	if (!isRoundModeCurDirection(Rnd))
	Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Rnd);
	}
	//default rounding mode
	if(!Cmp.getNode())
	Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC);

	SDValue CmpMask = getScalarMaskingNode(Cmp, Mask,
	DAG.getTargetConstant(0, dl,
	MVT::i1),
	Subtarget, DAG);
	return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i8, CmpMask,
	DAG.getIntPtrConstant(0, dl));
	}
	case COMI: { // Comparison intrinsics
	ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
	SDValue LHS = Op.getOperand(1);
	SDValue RHS = Op.getOperand(2);
	SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
	SDValue InvComi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, RHS, LHS);
	SDValue SetCC;
	switch (CC) {
	case ISD::SETEQ: { // (ZF = 0 and PF = 0)
	SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
	SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
	SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
	break;
	}
	case ISD::SETNE: { // (ZF = 1 or PF = 1)
	SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
	SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
	SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
	break;
	}
	case ISD::SETGT: // (CF = 0 and ZF = 0)
	SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
	break;
	case ISD::SETLT: { // The condition is opposite to GT. Swap the operands.
	SetCC = getSETCC(X86::COND_A, InvComi, dl, DAG);
	break;
	}
	case ISD::SETGE: // CF = 0
	SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
	break;
	case ISD::SETLE: // The condition is opposite to GE. Swap the operands.
	SetCC = getSETCC(X86::COND_AE, InvComi, dl, DAG);
	break;
	default:
	llvm_unreachable("Unexpected illegal condition!");
	}
	return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
	}
	case COMI_RM: { // Comparison intrinsics with Sae
	SDValue LHS = Op.getOperand(1);
	SDValue RHS = Op.getOperand(2);
	unsigned CondVal = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
	SDValue Sae = Op.getOperand(4);

	SDValue FCmp;
	if (isRoundModeCurDirection(Sae))
	FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS,
	DAG.getConstant(CondVal, dl, MVT::i8));
	else
	FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::v1i1, LHS, RHS,
	DAG.getConstant(CondVal, dl, MVT::i8), Sae);
	return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i32, FCmp,
	DAG.getIntPtrConstant(0, dl));
	}
	case VSHIFT:
	return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
	Op.getOperand(1), Op.getOperand(2), Subtarget,
	DAG);
	case COMPRESS_EXPAND_IN_REG: {
	SDValue Mask = Op.getOperand(3);
	SDValue DataToCompress = Op.getOperand(1);
	SDValue PassThru = Op.getOperand(2);
	if (isAllOnesConstant(Mask)) // return data as is
	return Op.getOperand(1);

	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
	DataToCompress),
	Mask, PassThru, Subtarget, DAG);
	}
	case BROADCASTM: {
	SDValue Mask = Op.getOperand(1);
	MVT MaskVT = MVT::getVectorVT(MVT::i1,
	Mask.getSimpleValueType().getSizeInBits());
	Mask = DAG.getBitcast(MaskVT, Mask);
	return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask);
	}
	case KUNPCK: {
	MVT VT = Op.getSimpleValueType();
	MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2);

	SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl);
	SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl);
	// Arguments should be swapped.
	SDValue Res = DAG.getNode(IntrData->Opc0, dl,
	MVT::getVectorVT(MVT::i1, VT.getSizeInBits()),
	Src2, Src1);
	return DAG.getBitcast(VT, Res);
	}
	case MASK_BINOP: {
	MVT VT = Op.getSimpleValueType();
	MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits());

	SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl);
	SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl);
	SDValue Res = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Src2);
	return DAG.getBitcast(VT, Res);
	}
	case FIXUPIMMS:
	case FIXUPIMMS_MASKZ:
	case FIXUPIMM:
	case FIXUPIMM_MASKZ:{
	SDValue Src1 = Op.getOperand(1);
	SDValue Src2 = Op.getOperand(2);
	SDValue Src3 = Op.getOperand(3);
	SDValue Imm = Op.getOperand(4);
	SDValue Mask = Op.getOperand(5);
	SDValue Passthru = (IntrData->Type == FIXUPIMM \|\| IntrData->Type == FIXUPIMMS ) ?
	Src1 : getZeroVector(VT, Subtarget, DAG, dl);
	// We specify 2 possible modes for intrinsics, with/without rounding
	// modes.
	// First, we check if the intrinsic have rounding mode (7 operands),
	// if not, we set rounding mode to "current".
	SDValue Rnd;
	if (Op.getNumOperands() == 7)
	Rnd = Op.getOperand(6);
	else
	Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
	if (IntrData->Type == FIXUPIMM \|\| IntrData->Type == FIXUPIMM_MASKZ)
	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
	Src1, Src2, Src3, Imm, Rnd),
	Mask, Passthru, Subtarget, DAG);
	else // Scalar - FIXUPIMMS, FIXUPIMMS_MASKZ
	return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
	Src1, Src2, Src3, Imm, Rnd),
	Mask, Passthru, Subtarget, DAG);
	}
	case CONVERT_TO_MASK: {
	MVT SrcVT = Op.getOperand(1).getSimpleValueType();
	MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
	MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits());

	SDValue CvtMask = DAG.getNode(IntrData->Opc0, dl, MaskVT,
	Op.getOperand(1));
	SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
	DAG.getUNDEF(BitcastVT), CvtMask,
	DAG.getIntPtrConstant(0, dl));
	return DAG.getBitcast(Op.getValueType(), Res);
	}
	case BRCST_SUBVEC_TO_VEC: {
	SDValue Src = Op.getOperand(1);
	SDValue Passthru = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);
	EVT resVT = Passthru.getValueType();
	SDValue subVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, resVT,
	DAG.getUNDEF(resVT), Src,
	DAG.getIntPtrConstant(0, dl));
	SDValue immVal;
	if (Src.getSimpleValueType().is256BitVector() && resVT.is512BitVector())
	immVal = DAG.getConstant(0x44, dl, MVT::i8);
	else
	immVal = DAG.getConstant(0, dl, MVT::i8);
	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
	subVec, subVec, immVal),
	Mask, Passthru, Subtarget, DAG);
	}
	case BRCST32x2_TO_VEC: {
	SDValue Src = Op.getOperand(1);
	SDValue PassThru = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);

	assert((VT.getScalarType() == MVT::i32 \|\|
	VT.getScalarType() == MVT::f32) && "Unexpected type!");
	//bitcast Src to packed 64
	MVT ScalarVT = VT.getScalarType() == MVT::i32 ? MVT::i64 : MVT::f64;
	MVT BitcastVT = MVT::getVectorVT(ScalarVT, Src.getValueSizeInBits()/64);
	Src = DAG.getBitcast(BitcastVT, Src);

	return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
	Mask, PassThru, Subtarget, DAG);
	}
	default:
	break;
	}
	}

	switch (IntNo) {
	default: return SDValue(); // Don't custom lower most intrinsics.

	case Intrinsic::x86_avx2_permd:
	case Intrinsic::x86_avx2_permps:
	// Operands intentionally swapped. Mask is last operand to intrinsic,
	// but second operand for node/instruction.
	return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(1));

	// ptest and testp intrinsics. The intrinsic these come from are designed to
	// return an integer value, not just an instruction so lower it to the ptest
	// or testp pattern and a setcc for the result.
	case Intrinsic::x86_sse41_ptestz:
	case Intrinsic::x86_sse41_ptestc:
	case Intrinsic::x86_sse41_ptestnzc:
	case Intrinsic::x86_avx_ptestz_256:
	case Intrinsic::x86_avx_ptestc_256:
	case Intrinsic::x86_avx_ptestnzc_256:
	case Intrinsic::x86_avx_vtestz_ps:
	case Intrinsic::x86_avx_vtestc_ps:
	case Intrinsic::x86_avx_vtestnzc_ps:
	case Intrinsic::x86_avx_vtestz_pd:
	case Intrinsic::x86_avx_vtestc_pd:
	case Intrinsic::x86_avx_vtestnzc_pd:
	case Intrinsic::x86_avx_vtestz_ps_256:
	case Intrinsic::x86_avx_vtestc_ps_256:
	case Intrinsic::x86_avx_vtestnzc_ps_256:
	case Intrinsic::x86_avx_vtestz_pd_256:
	case Intrinsic::x86_avx_vtestc_pd_256:
	case Intrinsic::x86_avx_vtestnzc_pd_256: {
	bool IsTestPacked = false;
	X86::CondCode X86CC;
	switch (IntNo) {
	default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
	case Intrinsic::x86_avx_vtestz_ps:
	case Intrinsic::x86_avx_vtestz_pd:
	case Intrinsic::x86_avx_vtestz_ps_256:
	case Intrinsic::x86_avx_vtestz_pd_256:
	IsTestPacked = true;
	LLVM_FALLTHROUGH;
	case Intrinsic::x86_sse41_ptestz:
	case Intrinsic::x86_avx_ptestz_256:
	// ZF = 1
	X86CC = X86::COND_E;
	break;
	case Intrinsic::x86_avx_vtestc_ps:
	case Intrinsic::x86_avx_vtestc_pd:
	case Intrinsic::x86_avx_vtestc_ps_256:
	case Intrinsic::x86_avx_vtestc_pd_256:
	IsTestPacked = true;
	LLVM_FALLTHROUGH;
	case Intrinsic::x86_sse41_ptestc:
	case Intrinsic::x86_avx_ptestc_256:
	// CF = 1
	X86CC = X86::COND_B;
	break;
	case Intrinsic::x86_avx_vtestnzc_ps:
	case Intrinsic::x86_avx_vtestnzc_pd:
	case Intrinsic::x86_avx_vtestnzc_ps_256:
	case Intrinsic::x86_avx_vtestnzc_pd_256:
	IsTestPacked = true;
	LLVM_FALLTHROUGH;
	case Intrinsic::x86_sse41_ptestnzc:
	case Intrinsic::x86_avx_ptestnzc_256:
	// ZF and CF = 0
	X86CC = X86::COND_A;
	break;
	}

	SDValue LHS = Op.getOperand(1);
	SDValue RHS = Op.getOperand(2);
	unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
	SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
	SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
	return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
	}
	case Intrinsic::x86_avx512_kortestz_w:
	case Intrinsic::x86_avx512_kortestc_w: {
	X86::CondCode X86CC =
	(IntNo == Intrinsic::x86_avx512_kortestz_w) ? X86::COND_E : X86::COND_B;
	SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
	SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
	SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
	SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
	return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
	}

	case Intrinsic::x86_avx512_knot_w: {
	SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
	SDValue RHS = DAG.getConstant(1, dl, MVT::v16i1);
	SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS);
	return DAG.getBitcast(MVT::i16, Res);
	}

	case Intrinsic::x86_avx512_kandn_w: {
	SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
	// Invert LHS for the not.
	LHS = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS,
	DAG.getConstant(1, dl, MVT::v16i1));
	SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
	SDValue Res = DAG.getNode(ISD::AND, dl, MVT::v16i1, LHS, RHS);
	return DAG.getBitcast(MVT::i16, Res);
	}

	case Intrinsic::x86_avx512_kxnor_w: {
	SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
	SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
	SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS);
	// Invert result for the not.
	Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, Res,
	DAG.getConstant(1, dl, MVT::v16i1));
	return DAG.getBitcast(MVT::i16, Res);
	}

	case Intrinsic::x86_sse42_pcmpistria128:
	case Intrinsic::x86_sse42_pcmpestria128:
	case Intrinsic::x86_sse42_pcmpistric128:
	case Intrinsic::x86_sse42_pcmpestric128:
	case Intrinsic::x86_sse42_pcmpistrio128:
	case Intrinsic::x86_sse42_pcmpestrio128:
	case Intrinsic::x86_sse42_pcmpistris128:
	case Intrinsic::x86_sse42_pcmpestris128:
	case Intrinsic::x86_sse42_pcmpistriz128:
	case Intrinsic::x86_sse42_pcmpestriz128: {
	unsigned Opcode;
	X86::CondCode X86CC;
	switch (IntNo) {
	default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
	case Intrinsic::x86_sse42_pcmpistria128:
	Opcode = X86ISD::PCMPISTRI;
	X86CC = X86::COND_A;
	break;
	case Intrinsic::x86_sse42_pcmpestria128:
	Opcode = X86ISD::PCMPESTRI;
	X86CC = X86::COND_A;
	break;
	case Intrinsic::x86_sse42_pcmpistric128:
	Opcode = X86ISD::PCMPISTRI;
	X86CC = X86::COND_B;
	break;
	case Intrinsic::x86_sse42_pcmpestric128:
	Opcode = X86ISD::PCMPESTRI;
	X86CC = X86::COND_B;
	break;
	case Intrinsic::x86_sse42_pcmpistrio128:
	Opcode = X86ISD::PCMPISTRI;
	X86CC = X86::COND_O;
	break;
	case Intrinsic::x86_sse42_pcmpestrio128:
	Opcode = X86ISD::PCMPESTRI;
	X86CC = X86::COND_O;
	break;
	case Intrinsic::x86_sse42_pcmpistris128:
	Opcode = X86ISD::PCMPISTRI;
	X86CC = X86::COND_S;
	break;
	case Intrinsic::x86_sse42_pcmpestris128:
	Opcode = X86ISD::PCMPESTRI;
	X86CC = X86::COND_S;
	break;
	case Intrinsic::x86_sse42_pcmpistriz128:
	Opcode = X86ISD::PCMPISTRI;
	X86CC = X86::COND_E;
	break;
	case Intrinsic::x86_sse42_pcmpestriz128:
	Opcode = X86ISD::PCMPESTRI;
	X86CC = X86::COND_E;
	break;
	}
	SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
	SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps);
	SDValue SetCC = getSETCC(X86CC, SDValue(PCMP.getNode(), 1), dl, DAG);
	return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
	}

	case Intrinsic::x86_sse42_pcmpistri128:
	case Intrinsic::x86_sse42_pcmpestri128: {
	unsigned Opcode;
	if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
	Opcode = X86ISD::PCMPISTRI;
	else
	Opcode = X86ISD::PCMPESTRI;

	SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
	return DAG.getNode(Opcode, dl, VTs, NewOps);
	}

	case Intrinsic::eh_sjlj_lsda: {
	MachineFunction &MF = DAG.getMachineFunction();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
	auto &Context = MF.getMMI().getContext();
	MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
	Twine(MF.getFunctionNumber()));
	return DAG.getNode(X86ISD::Wrapper, dl, VT, DAG.getMCSymbol(S, PtrVT));
	}

	case Intrinsic::x86_seh_lsda: {
	// Compute the symbol for the LSDA. We know it'll get emitted later.
	MachineFunction &MF = DAG.getMachineFunction();
	SDValue Op1 = Op.getOperand(1);
	auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());
	MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol(
	GlobalValue::dropLLVMManglingEscape(Fn->getName()));

	// Generate a simple absolute symbol reference. This intrinsic is only
	// supported on 32-bit Windows, which isn't PIC.
	SDValue Result = DAG.getMCSymbol(LSDASym, VT);
	return DAG.getNode(X86ISD::Wrapper, dl, VT, Result);
	}

	case Intrinsic::x86_seh_recoverfp: {
	SDValue FnOp = Op.getOperand(1);
	SDValue IncomingFPOp = Op.getOperand(2);
	GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
	auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
	if (!Fn)
	report_fatal_error(
	"llvm.x86.seh.recoverfp must take a function as the first argument");
	return recoverFramePointer(DAG, Fn, IncomingFPOp);
	}

	case Intrinsic::localaddress: {
	// Returns one of the stack, base, or frame pointer registers, depending on
	// which is used to reference local variables.
	MachineFunction &MF = DAG.getMachineFunction();
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	unsigned Reg;
	if (RegInfo->hasBasePointer(MF))
	Reg = RegInfo->getBaseRegister();
	else // This function handles the SP or FP case.
	Reg = RegInfo->getPtrSizedFrameRegister(MF);
	return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
	}
	}
	}

	static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
	SDValue Src, SDValue Mask, SDValue Base,
	SDValue Index, SDValue ScaleOp, SDValue Chain,
	const X86Subtarget &Subtarget) {
	SDLoc dl(Op);
	auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
	// Scale must be constant.
	if (!C)
	return SDValue();
	SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
	EVT MaskVT = Mask.getValueType();
	SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
	SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
	SDValue Segment = DAG.getRegister(0, MVT::i32);
	// If source is undef or we know it won't be used, use a zero vector
	// to break register dependency.
	// TODO: use undef instead and let ExecutionDepsFix deal with it?
	if (Src.isUndef() \|\| ISD::isBuildVectorAllOnes(Mask.getNode()))
	Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
	SDValue Ops[] = {Src, Base, Scale, Index, Disp, Segment, Mask, Chain};
	SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
	SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
	return DAG.getMergeValues(RetOps, dl);
	}

	static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
	SDValue Src, SDValue Mask, SDValue Base,
	SDValue Index, SDValue ScaleOp, SDValue Chain,
	const X86Subtarget &Subtarget) {
	SDLoc dl(Op);
	auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
	// Scale must be constant.
	if (!C)
	return SDValue();
	SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
	MVT MaskVT = MVT::getVectorVT(MVT::i1,
	Index.getSimpleValueType().getVectorNumElements());

	SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
	SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
	SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
	SDValue Segment = DAG.getRegister(0, MVT::i32);
	// If source is undef or we know it won't be used, use a zero vector
	// to break register dependency.
	// TODO: use undef instead and let ExecutionDepsFix deal with it?
	if (Src.isUndef() \|\| ISD::isBuildVectorAllOnes(VMask.getNode()))
	Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
	SDValue Ops[] = {Src, VMask, Base, Scale, Index, Disp, Segment, Chain};
	SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
	SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
	return DAG.getMergeValues(RetOps, dl);
	}

	static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
	SDValue Src, SDValue Mask, SDValue Base,
	SDValue Index, SDValue ScaleOp, SDValue Chain,
	const X86Subtarget &Subtarget) {
	SDLoc dl(Op);
	auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
	// Scale must be constant.
	if (!C)
	return SDValue();
	SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
	SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
	SDValue Segment = DAG.getRegister(0, MVT::i32);
	MVT MaskVT = MVT::getVectorVT(MVT::i1,
	Index.getSimpleValueType().getVectorNumElements());

	SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
	SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
	SDValue Ops[] = {Base, Scale, Index, Disp, Segment, VMask, Src, Chain};
	SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
	return SDValue(Res, 1);
	}

	static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
	SDValue Mask, SDValue Base, SDValue Index,
	SDValue ScaleOp, SDValue Chain,
	const X86Subtarget &Subtarget) {
	SDLoc dl(Op);
	auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
	// Scale must be constant.
	if (!C)
	return SDValue();
	SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
	SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
	SDValue Segment = DAG.getRegister(0, MVT::i32);
	MVT MaskVT =
	MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
	SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
	SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain};
	SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
	return SDValue(Res, 0);
	}

	/// Handles the lowering of builtin intrinsic that return the value
	/// of the extended control register.
	static void getExtendedControlRegister(SDNode *N, const SDLoc &DL,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	SmallVectorImpl<SDValue> &Results) {
	assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue LO, HI;

	// The ECX register is used to select the index of the XCR register to
	// return.
	SDValue Chain =
	DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX, N->getOperand(2));
	SDNode *N1 = DAG.getMachineNode(X86::XGETBV, DL, Tys, Chain);
	Chain = SDValue(N1, 0);

	// Reads the content of XCR and returns it in registers EDX:EAX.
	if (Subtarget.is64Bit()) {
	LO = DAG.getCopyFromReg(Chain, DL, X86::RAX, MVT::i64, SDValue(N1, 1));
	HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
	LO.getValue(2));
	} else {
	LO = DAG.getCopyFromReg(Chain, DL, X86::EAX, MVT::i32, SDValue(N1, 1));
	HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
	LO.getValue(2));
	}
	Chain = HI.getValue(1);

	if (Subtarget.is64Bit()) {
	// Merge the two 32-bit values into a 64-bit one..
	SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
	DAG.getConstant(32, DL, MVT::i8));
	Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
	Results.push_back(Chain);
	return;
	}

	// Use a buildpair to merge the two 32-bit values into a 64-bit one.
	SDValue Ops[] = { LO, HI };
	SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
	Results.push_back(Pair);
	Results.push_back(Chain);
	}

	/// Handles the lowering of builtin intrinsics that read performance monitor
	/// counters (x86_rdpmc).
	static void getReadPerformanceCounter(SDNode *N, const SDLoc &DL,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	SmallVectorImpl<SDValue> &Results) {
	assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue LO, HI;

	// The ECX register is used to select the index of the performance counter
	// to read.
	SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX,
	N->getOperand(2));
	SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain);

	// Reads the content of a 64-bit performance counter and returns it in the
	// registers EDX:EAX.
	if (Subtarget.is64Bit()) {
	LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
	HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
	LO.getValue(2));
	} else {
	LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
	HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
	LO.getValue(2));
	}
	Chain = HI.getValue(1);

	if (Subtarget.is64Bit()) {
	// The EAX register is loaded with the low-order 32 bits. The EDX register
	// is loaded with the supported high-order bits of the counter.
	SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
	DAG.getConstant(32, DL, MVT::i8));
	Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
	Results.push_back(Chain);
	return;
	}

	// Use a buildpair to merge the two 32-bit values into a 64-bit one.
	SDValue Ops[] = { LO, HI };
	SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
	Results.push_back(Pair);
	Results.push_back(Chain);
	}

	/// Handles the lowering of builtin intrinsics that read the time stamp counter
	/// (x86_rdtsc and x86_rdtscp). This function is also used to custom lower
	/// READCYCLECOUNTER nodes.
	static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	SmallVectorImpl<SDValue> &Results) {
	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0));
	SDValue LO, HI;

	// The processor's time-stamp counter (a 64-bit MSR) is stored into the
	// EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
	// and the EAX register is loaded with the low-order 32 bits.
	if (Subtarget.is64Bit()) {
	LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
	HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
	LO.getValue(2));
	} else {
	LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
	HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
	LO.getValue(2));
	}
	SDValue Chain = HI.getValue(1);

	if (Opcode == X86ISD::RDTSCP_DAG) {
	assert(N->getNumOperands() == 3 && "Unexpected number of operands!");

	// Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
	// the ECX register. Add 'ecx' explicitly to the chain.
	SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
	HI.getValue(2));
	// Explicitly store the content of ECX at the location passed in input
	// to the 'rdtscp' intrinsic.
	Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2),
	MachinePointerInfo());
	}

	if (Subtarget.is64Bit()) {
	// The EDX register is loaded with the high-order 32 bits of the MSR, and
	// the EAX register is loaded with the low-order 32 bits.
	SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
	DAG.getConstant(32, DL, MVT::i8));
	Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
	Results.push_back(Chain);
	return;
	}

	// Use a buildpair to merge the two 32-bit values into a 64-bit one.
	SDValue Ops[] = { LO, HI };
	SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
	Results.push_back(Pair);
	Results.push_back(Chain);
	}

	static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SmallVector<SDValue, 2> Results;
	SDLoc DL(Op);
	getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
	Results);
	return DAG.getMergeValues(Results, DL);
	}

	static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) {
	MachineFunction &MF = DAG.getMachineFunction();
	SDValue Chain = Op.getOperand(0);
	SDValue RegNode = Op.getOperand(2);
	WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
	if (!EHInfo)
	report_fatal_error("EH registrations only live in functions using WinEH");

	// Cast the operand to an alloca, and remember the frame index.
	auto *FINode = dyn_cast<FrameIndexSDNode>(RegNode);
	if (!FINode)
	report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca");
	EHInfo->EHRegNodeFrameIndex = FINode->getIndex();

	// Return the chain operand without making any DAG nodes.
	return Chain;
	}

	static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) {
	MachineFunction &MF = DAG.getMachineFunction();
	SDValue Chain = Op.getOperand(0);
	SDValue EHGuard = Op.getOperand(2);
	WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
	if (!EHInfo)
	report_fatal_error("EHGuard only live in functions using WinEH");

	// Cast the operand to an alloca, and remember the frame index.
	auto *FINode = dyn_cast<FrameIndexSDNode>(EHGuard);
	if (!FINode)
	report_fatal_error("llvm.x86.seh.ehguard expects a static alloca");
	EHInfo->EHGuardFrameIndex = FINode->getIndex();

	// Return the chain operand without making any DAG nodes.
	return Chain;
	}

	/// Emit Truncating Store with signed or unsigned saturation.
	static SDValue
	EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val,
	SDValue Ptr, EVT MemVT, MachineMemOperand *MMO,
	SelectionDAG &DAG) {

	SDVTList VTs = DAG.getVTList(MVT::Other);
	SDValue Undef = DAG.getUNDEF(Ptr.getValueType());
	SDValue Ops[] = { Chain, Val, Ptr, Undef };
	return SignedSat ?
	DAG.getTargetMemSDNode<TruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
	DAG.getTargetMemSDNode<TruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
	}

	/// Emit Masked Truncating Store with signed or unsigned saturation.
	static SDValue
	EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
	SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT,
	MachineMemOperand *MMO, SelectionDAG &DAG) {

	SDVTList VTs = DAG.getVTList(MVT::Other);
	SDValue Ops[] = { Chain, Ptr, Mask, Val };
	return SignedSat ?
	DAG.getTargetMemSDNode<MaskedTruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
	DAG.getTargetMemSDNode<MaskedTruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
	}

	static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();

	const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo);
	if (!IntrData) {
	switch (IntNo) {
	case llvm::Intrinsic::x86_seh_ehregnode:
	return MarkEHRegistrationNode(Op, DAG);
	case llvm::Intrinsic::x86_seh_ehguard:
	return MarkEHGuard(Op, DAG);
	case llvm::Intrinsic::x86_flags_read_u32:
	case llvm::Intrinsic::x86_flags_read_u64:
	case llvm::Intrinsic::x86_flags_write_u32:
	case llvm::Intrinsic::x86_flags_write_u64: {
	// We need a frame pointer because this will get lowered to a PUSH/POP
	// sequence.
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setHasCopyImplyingStackAdjustment(true);
	// Don't do anything here, we will expand these intrinsics out later
	// during ExpandISelPseudos in EmitInstrWithCustomInserter.
	return SDValue();
	}
	case Intrinsic::x86_lwpins32:
	case Intrinsic::x86_lwpins64: {
	SDLoc dl(Op);
	SDValue Chain = Op->getOperand(0);
	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
	SDValue LwpIns =
	DAG.getNode(X86ISD::LWPINS, dl, VTs, Chain, Op->getOperand(2),
	Op->getOperand(3), Op->getOperand(4));
	SDValue SetCC = getSETCC(X86::COND_B, LwpIns.getValue(0), dl, DAG);
	SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, SetCC);
	return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
	LwpIns.getValue(1));
	}
	}
	return SDValue();
	}

	SDLoc dl(Op);
	switch(IntrData->Type) {
	default: llvm_unreachable("Unknown Intrinsic Type");
	case RDSEED:
	case RDRAND: {
	// Emit the node with the right value type.
	SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
	SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));

	// If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
	// Otherwise return the value from Rand, which is always 0, casted to i32.
	SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
	DAG.getConstant(1, dl, Op->getValueType(1)),
	DAG.getConstant(X86::COND_B, dl, MVT::i32),
	SDValue(Result.getNode(), 1) };
	SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
	DAG.getVTList(Op->getValueType(1), MVT::Glue),
	Ops);

	// Return { result, isValid, chain }.
	return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
	SDValue(Result.getNode(), 2));
	}
	case GATHER_AVX2: {
	SDValue Chain = Op.getOperand(0);
	SDValue Src = Op.getOperand(2);
	SDValue Base = Op.getOperand(3);
	SDValue Index = Op.getOperand(4);
	SDValue Mask = Op.getOperand(5);
	SDValue Scale = Op.getOperand(6);
	return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
	Scale, Chain, Subtarget);
	}
	case GATHER: {
	//gather(v1, mask, index, base, scale);
	SDValue Chain = Op.getOperand(0);
	SDValue Src = Op.getOperand(2);
	SDValue Base = Op.getOperand(3);
	SDValue Index = Op.getOperand(4);
	SDValue Mask = Op.getOperand(5);
	SDValue Scale = Op.getOperand(6);
	return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale,
	Chain, Subtarget);
	}
	case SCATTER: {
	//scatter(base, mask, index, v1, scale);
	SDValue Chain = Op.getOperand(0);
	SDValue Base = Op.getOperand(2);
	SDValue Mask = Op.getOperand(3);
	SDValue Index = Op.getOperand(4);
	SDValue Src = Op.getOperand(5);
	SDValue Scale = Op.getOperand(6);
	return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
	Scale, Chain, Subtarget);
	}
	case PREFETCH: {
	SDValue Hint = Op.getOperand(6);
	unsigned HintVal = cast<ConstantSDNode>(Hint)->getZExtValue();
	assert((HintVal == 2 \|\| HintVal == 3) &&
	"Wrong prefetch hint in intrinsic: should be 2 or 3");
	unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0);
	SDValue Chain = Op.getOperand(0);
	SDValue Mask = Op.getOperand(2);
	SDValue Index = Op.getOperand(3);
	SDValue Base = Op.getOperand(4);
	SDValue Scale = Op.getOperand(5);
	return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain,
	Subtarget);
	}
	// Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP).
	case RDTSC: {
	SmallVector<SDValue, 2> Results;
	getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget,
	Results);
	return DAG.getMergeValues(Results, dl);
	}
	// Read Performance Monitoring Counters.
	case RDPMC: {
	SmallVector<SDValue, 2> Results;
	getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results);
	return DAG.getMergeValues(Results, dl);
	}
	// Get Extended Control Register.
	case XGETBV: {
	SmallVector<SDValue, 2> Results;
	getExtendedControlRegister(Op.getNode(), dl, DAG, Subtarget, Results);
	return DAG.getMergeValues(Results, dl);
	}
	// XTEST intrinsics.
	case XTEST: {
	SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
	SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));

	SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG);
	SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
	return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
	Ret, SDValue(InTrans.getNode(), 1));
	}
	// ADC/ADCX/SBB
	case ADX: {
	SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
	SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::i32);
	SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
	DAG.getConstant(-1, dl, MVT::i8));
	SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3),
	Op.getOperand(4), GenCF.getValue(1));
	SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0),
	Op.getOperand(5), MachinePointerInfo());
	SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
	SDValue Results[] = { SetCC, Store };
	return DAG.getMergeValues(Results, dl);
	}
	case COMPRESS_TO_MEM: {
	SDValue Mask = Op.getOperand(4);
	SDValue DataToCompress = Op.getOperand(3);
	SDValue Addr = Op.getOperand(2);
	SDValue Chain = Op.getOperand(0);
	MVT VT = DataToCompress.getSimpleValueType();

	MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
	assert(MemIntr && "Expected MemIntrinsicSDNode!");

	if (isAllOnesConstant(Mask)) // return just a store
	return DAG.getStore(Chain, dl, DataToCompress, Addr,
	MemIntr->getMemOperand());

	MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
	SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);

	return DAG.getMaskedStore(Chain, dl, DataToCompress, Addr, VMask, VT,
	MemIntr->getMemOperand(),
	false /* truncating /, true / compressing */);
	}
	case TRUNCATE_TO_MEM_VI8:
	case TRUNCATE_TO_MEM_VI16:
	case TRUNCATE_TO_MEM_VI32: {
	SDValue Mask = Op.getOperand(4);
	SDValue DataToTruncate = Op.getOperand(3);
	SDValue Addr = Op.getOperand(2);
	SDValue Chain = Op.getOperand(0);

	MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
	assert(MemIntr && "Expected MemIntrinsicSDNode!");

	EVT MemVT = MemIntr->getMemoryVT();

	uint16_t TruncationOp = IntrData->Opc0;
	switch (TruncationOp) {
	case X86ISD::VTRUNC: {
	if (isAllOnesConstant(Mask)) // return just a truncate store
	return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT,
	MemIntr->getMemOperand());

	MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
	SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);

	return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT,
	MemIntr->getMemOperand(), true /* truncating */);
	}
	case X86ISD::VTRUNCUS:
	case X86ISD::VTRUNCS: {
	bool IsSigned = (TruncationOp == X86ISD::VTRUNCS);
	if (isAllOnesConstant(Mask))
	return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT,
	MemIntr->getMemOperand(), DAG);

	MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
	SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);

	return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr,
	VMask, MemVT, MemIntr->getMemOperand(), DAG);
	}
	default:
	llvm_unreachable("Unsupported truncstore intrinsic");
	}
	}

	case EXPAND_FROM_MEM: {
	SDValue Mask = Op.getOperand(4);
	SDValue PassThru = Op.getOperand(3);
	SDValue Addr = Op.getOperand(2);
	SDValue Chain = Op.getOperand(0);
	MVT VT = Op.getSimpleValueType();

	MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
	assert(MemIntr && "Expected MemIntrinsicSDNode!");

	if (isAllOnesConstant(Mask)) // Return a regular (unmasked) vector load.
	return DAG.getLoad(VT, dl, Chain, Addr, MemIntr->getMemOperand());
	if (X86::isZeroNode(Mask))
	return DAG.getUNDEF(VT);

	MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
	SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
	return DAG.getMaskedLoad(VT, dl, Chain, Addr, VMask, PassThru, VT,
	MemIntr->getMemOperand(), ISD::NON_EXTLOAD,
	true /* expanding */);
	}
	}
	}

	SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setReturnAddressIsTaken(true);

	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
	return SDValue();

	unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	SDLoc dl(Op);
	EVT PtrVT = getPointerTy(DAG.getDataLayout());

	if (Depth > 0) {
	SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT);
	return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
	DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
	MachinePointerInfo());
	}

	// Just load the return address.
	SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
	return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
	MachinePointerInfo());
	}

	SDValue X86TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
	SelectionDAG &DAG) const {
	DAG.getMachineFunction().getFrameInfo().setReturnAddressIsTaken(true);
	return getReturnAddressFrameIndex(DAG);
	}

	SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	EVT VT = Op.getValueType();

	MFI.setFrameAddressIsTaken(true);

	if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
	// Depth > 0 makes no sense on targets which use Windows unwind codes. It
	// is not possible to crawl up the stack without looking at the unwind codes
	// simultaneously.
	int FrameAddrIndex = FuncInfo->getFAIndex();
	if (!FrameAddrIndex) {
	// Set up a frame object for the return address.
	unsigned SlotSize = RegInfo->getSlotSize();
	FrameAddrIndex = MF.getFrameInfo().CreateFixedObject(
	SlotSize, /Offset=/0, /IsImmutable=/false);
	FuncInfo->setFAIndex(FrameAddrIndex);
	}
	return DAG.getFrameIndex(FrameAddrIndex, VT);
	}

	unsigned FrameReg =
	RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
	SDLoc dl(Op); // FIXME probably not meaningful
	unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	assert(((FrameReg == X86::RBP && VT == MVT::i64) \|\|
	(FrameReg == X86::EBP && VT == MVT::i32)) &&
	"Invalid Frame Register!");
	SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
	while (Depth--)
	FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
	MachinePointerInfo());
	return FrameAddr;
	}

	// FIXME? Maybe this could be a TableGen attribute on some registers and
	// this table could be generated automatically from RegInfo.
	unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
	SelectionDAG &DAG) const {
	const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
	const MachineFunction &MF = DAG.getMachineFunction();

	unsigned Reg = StringSwitch<unsigned>(RegName)
	.Case("esp", X86::ESP)
	.Case("rsp", X86::RSP)
	.Case("ebp", X86::EBP)
	.Case("rbp", X86::RBP)
	.Default(0);

	if (Reg == X86::EBP \|\| Reg == X86::RBP) {
	if (!TFI.hasFP(MF))
	report_fatal_error("register " + StringRef(RegName) +
	" is allocatable: function has no frame pointer");
	#ifndef NDEBUG
	else {
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	unsigned FrameReg =
	RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
	assert((FrameReg == X86::EBP \|\| FrameReg == X86::RBP) &&
	"Invalid Frame Register!");
	}
	#endif
	}

	if (Reg)
	return Reg;

	report_fatal_error("Invalid register name global variable");
	}

	SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
	SelectionDAG &DAG) const {
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op));
	}

	unsigned X86TargetLowering::getExceptionPointerRegister(
	const Constant *PersonalityFn) const {
	if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR)
	return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX;

	return Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX;
	}

	unsigned X86TargetLowering::getExceptionSelectorRegister(
	const Constant *PersonalityFn) const {
	// Funclet personalities don't use selectors (the runtime does the selection).
	assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)));
	return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX;
	}

	bool X86TargetLowering::needsFixedCatchObjects() const {
	return Subtarget.isTargetWin64();
	}

	SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	SDValue Offset = Op.getOperand(1);
	SDValue Handler = Op.getOperand(2);
	SDLoc dl (Op);

	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
	assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) \|\|
	(FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
	"Invalid Frame Register!");
	SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
	unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;

	SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
	DAG.getIntPtrConstant(RegInfo->getSlotSize(),
	dl));
	StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset);
	Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
	Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);

	return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain,
	DAG.getRegister(StoreAddrReg, PtrVT));
	}

	SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	// If the subtarget is not 64bit, we may need the global base reg
	// after isel expand pseudo, i.e., after CGBR pass ran.
	// Therefore, ask for the GlobalBaseReg now, so that the pass
	// inserts the code for us in case we need it.
	// Otherwise, we will end up in a situation where we will
	// reference a virtual register that is not defined!
	if (!Subtarget.is64Bit()) {
	const X86InstrInfo *TII = Subtarget.getInstrInfo();
	(void)TII->getGlobalBaseReg(&DAG.getMachineFunction());
	}
	return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL,
	DAG.getVTList(MVT::i32, MVT::Other),
	Op.getOperand(0), Op.getOperand(1));
	}

	SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
	Op.getOperand(0), Op.getOperand(1));
	}

	SDValue X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	return DAG.getNode(X86ISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
	Op.getOperand(0));
	}

	static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
	return Op.getOperand(0);
	}

	SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Root = Op.getOperand(0);
	SDValue Trmp = Op.getOperand(1); // trampoline
	SDValue FPtr = Op.getOperand(2); // nested function
	SDValue Nest = Op.getOperand(3); // 'nest' parameter value
	SDLoc dl (Op);

	const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();

	if (Subtarget.is64Bit()) {
	SDValue OutChains[6];

	// Large code-model.
	const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
	const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.

	const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;
	const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7;

	const unsigned char REX_WB = 0x40 \| 0x08 \| 0x01; // REX prefix

	// Load the pointer to the nested function into R11.
	unsigned OpCode = ((MOV64ri \| N86R11) << 8) \| REX_WB; // movabsq r11
	SDValue Addr = Trmp;
	OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16),
	Addr, MachinePointerInfo(TrmpAddr));

	Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
	DAG.getConstant(2, dl, MVT::i64));
	OutChains[1] =
	DAG.getStore(Root, dl, FPtr, Addr, MachinePointerInfo(TrmpAddr, 2),
	/* Alignment = */ 2);

	// Load the 'nest' parameter value into R10.
	// R10 is specified in X86CallingConv.td
	OpCode = ((MOV64ri \| N86R10) << 8) \| REX_WB; // movabsq r10
	Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
	DAG.getConstant(10, dl, MVT::i64));
	OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16),
	Addr, MachinePointerInfo(TrmpAddr, 10));

	Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
	DAG.getConstant(12, dl, MVT::i64));
	OutChains[3] =
	DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 12),
	/* Alignment = */ 2);

	// Jump to the nested function.
	OpCode = (JMP64r << 8) \| REX_WB; // jmpq *...
	Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
	DAG.getConstant(20, dl, MVT::i64));
	OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16),
	Addr, MachinePointerInfo(TrmpAddr, 20));

	unsigned char ModRM = N86R11 \| (4 << 3) \| (3 << 6); // ...r11
	Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
	DAG.getConstant(22, dl, MVT::i64));
	OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, dl, MVT::i8),
	Addr, MachinePointerInfo(TrmpAddr, 22));

	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
	} else {
	const Function *Func =
	cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
	CallingConv::ID CC = Func->getCallingConv();
	unsigned NestReg;

	switch (CC) {
	default:
	llvm_unreachable("Unsupported calling convention");
	case CallingConv::C:
	case CallingConv::X86_StdCall: {
	// Pass 'nest' parameter in ECX.
	// Must be kept in sync with X86CallingConv.td
	NestReg = X86::ECX;

	// Check that ECX wasn't needed by an 'inreg' parameter.
	FunctionType *FTy = Func->getFunctionType();
	const AttributeList &Attrs = Func->getAttributes();

	if (!Attrs.isEmpty() && !Func->isVarArg()) {
	unsigned InRegCount = 0;
	unsigned Idx = 1;

	for (FunctionType::param_iterator I = FTy->param_begin(),
	E = FTy->param_end(); I != E; ++I, ++Idx)
	if (Attrs.hasAttribute(Idx, Attribute::InReg)) {
	auto &DL = DAG.getDataLayout();
	// FIXME: should only count parameters that are lowered to integers.
	InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32;
	}

	if (InRegCount > 2) {
	report_fatal_error("Nest register in use - reduce number of inreg"
	" parameters!");
	}
	}
	break;
	}
	case CallingConv::X86_FastCall:
	case CallingConv::X86_ThisCall:
	case CallingConv::Fast:
	// Pass 'nest' parameter in EAX.
	// Must be kept in sync with X86CallingConv.td
	NestReg = X86::EAX;
	break;
	}

	SDValue OutChains[4];
	SDValue Addr, Disp;

	Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
	DAG.getConstant(10, dl, MVT::i32));
	Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);

	// This is storing the opcode for MOV32ri.
	const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
	const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7;
	OutChains[0] =
	DAG.getStore(Root, dl, DAG.getConstant(MOV32ri \| N86Reg, dl, MVT::i8),
	Trmp, MachinePointerInfo(TrmpAddr));

	Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
	DAG.getConstant(1, dl, MVT::i32));
	OutChains[1] =
	DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 1),
	/* Alignment = */ 1);

	const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
	Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
	DAG.getConstant(5, dl, MVT::i32));
	OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8),
	Addr, MachinePointerInfo(TrmpAddr, 5),
	/* Alignment = */ 1);

	Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
	DAG.getConstant(6, dl, MVT::i32));
	OutChains[3] =
	DAG.getStore(Root, dl, Disp, Addr, MachinePointerInfo(TrmpAddr, 6),
	/* Alignment = */ 1);

	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
	}
	}

	SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
	SelectionDAG &DAG) const {
	/*
	The rounding mode is in bits 11:10 of FPSR, and has the following
	settings:
	00 Round to nearest
	01 Round to -inf
	10 Round to +inf
	11 Round to 0

	FLT_ROUNDS, on the other hand, expects the following:
	-1 Undefined
	0 Round to 0
	1 Round to nearest
	2 Round to +inf
	3 Round to -inf

	To perform the conversion, we do:
	(((((FPSR & 0x800) >> 11) \| ((FPSR & 0x400) >> 9)) + 1) & 3)
	*/

	MachineFunction &MF = DAG.getMachineFunction();
	const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
	unsigned StackAlignment = TFI.getStackAlignment();
	MVT VT = Op.getSimpleValueType();
	SDLoc DL(Op);

	// Save FP Control Word to stack slot
	int SSFI = MF.getFrameInfo().CreateStackObject(2, StackAlignment, false);
	SDValue StackSlot =
	DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout()));

	MachineMemOperand *MMO =
	MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
	MachineMemOperand::MOStore, 2, 2);

	SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
	SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
	DAG.getVTList(MVT::Other),
	Ops, MVT::i16, MMO);

	// Load FP Control Word from stack slot
	SDValue CWD =
	DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo());

	// Transform as necessary
	SDValue CWD1 =
	DAG.getNode(ISD::SRL, DL, MVT::i16,
	DAG.getNode(ISD::AND, DL, MVT::i16,
	CWD, DAG.getConstant(0x800, DL, MVT::i16)),
	DAG.getConstant(11, DL, MVT::i8));
	SDValue CWD2 =
	DAG.getNode(ISD::SRL, DL, MVT::i16,
	DAG.getNode(ISD::AND, DL, MVT::i16,
	CWD, DAG.getConstant(0x400, DL, MVT::i16)),
	DAG.getConstant(9, DL, MVT::i8));

	SDValue RetVal =
	DAG.getNode(ISD::AND, DL, MVT::i16,
	DAG.getNode(ISD::ADD, DL, MVT::i16,
	DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
	DAG.getConstant(1, DL, MVT::i16)),
	DAG.getConstant(3, DL, MVT::i16));

	return DAG.getNode((VT.getSizeInBits() < 16 ?
	ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
	}

	// Split an unary integer op into 2 half sized ops.
	static SDValue LowerVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	unsigned NumElems = VT.getVectorNumElements();
	unsigned SizeInBits = VT.getSizeInBits();

	// Extract the Lo/Hi vectors
	SDLoc dl(Op);
	SDValue Src = Op.getOperand(0);
	SDValue Lo = extractSubVector(Src, 0, DAG, dl, SizeInBits / 2);
	SDValue Hi = extractSubVector(Src, NumElems / 2, DAG, dl, SizeInBits / 2);

	MVT EltVT = VT.getVectorElementType();
	MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
	DAG.getNode(Op.getOpcode(), dl, NewVT, Lo),
	DAG.getNode(Op.getOpcode(), dl, NewVT, Hi));
	}

	// Decompose 256-bit ops into smaller 128-bit ops.
	static SDValue Lower256IntUnary(SDValue Op, SelectionDAG &DAG) {
	assert(Op.getSimpleValueType().is256BitVector() &&
	Op.getSimpleValueType().isInteger() &&
	"Only handle AVX 256-bit vector integer operation");
	return LowerVectorIntUnary(Op, DAG);
	}

	// Decompose 512-bit ops into smaller 256-bit ops.
	static SDValue Lower512IntUnary(SDValue Op, SelectionDAG &DAG) {
	assert(Op.getSimpleValueType().is512BitVector() &&
	Op.getSimpleValueType().isInteger() &&
	"Only handle AVX 512-bit vector integer operation");
	return LowerVectorIntUnary(Op, DAG);
	}

	/// \brief Lower a vector CTLZ using native supported vector CTLZ instruction.
	//
	// i8/i16 vector implemented using dword LZCNT vector instruction
	// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal,
	// split the vector, perform operation on it's Lo a Hi part and
	// concatenate the results.
	static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG) {
	assert(Op.getOpcode() == ISD::CTLZ);
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	unsigned NumElems = VT.getVectorNumElements();

	assert((EltVT == MVT::i8 \|\| EltVT == MVT::i16) &&
	"Unsupported element type");

	// Split vector, it's Lo and Hi parts will be handled in next iteration.
	if (16 < NumElems)
	return LowerVectorIntUnary(Op, DAG);

	MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
	assert((NewVT.is256BitVector() \|\| NewVT.is512BitVector()) &&
	"Unsupported value type for operation");

	// Use native supported vector instruction vplzcntd.
	Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0));
	SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op);
	SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode);
	SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT);

	return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta);
	}

	// Lower CTLZ using a PSHUFB lookup table implementation.
	static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	int NumElts = VT.getVectorNumElements();
	int NumBytes = NumElts * (VT.getScalarSizeInBits() / 8);
	MVT CurrVT = MVT::getVectorVT(MVT::i8, NumBytes);

	// Per-nibble leading zero PSHUFB lookup table.
	const int LUT[16] = {/* 0 / 4, / 1 / 3, / 2 / 2, / 3 */ 2,
	/* 4 / 1, / 5 / 1, / 6 / 1, / 7 */ 1,
	/* 8 / 0, / 9 / 0, / a / 0, / b */ 0,
	/* c / 0, / d / 0, / e / 0, / f */ 0};

	SmallVector<SDValue, 64> LUTVec;
	for (int i = 0; i < NumBytes; ++i)
	LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
	SDValue InRegLUT = DAG.getBuildVector(CurrVT, DL, LUTVec);

	// Begin by bitcasting the input to byte vector, then split those bytes
	// into lo/hi nibbles and use the PSHUFB LUT to perform CLTZ on each of them.
	// If the hi input nibble is zero then we add both results together, otherwise
	// we just take the hi result (by masking the lo result to zero before the
	// add).
	SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0));
	SDValue Zero = getZeroVector(CurrVT, Subtarget, DAG, DL);

	SDValue NibbleMask = DAG.getConstant(0xF, DL, CurrVT);
	SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT);
	SDValue Lo = DAG.getNode(ISD::AND, DL, CurrVT, Op0, NibbleMask);
	SDValue Hi = DAG.getNode(ISD::SRL, DL, CurrVT, Op0, NibbleShift);
	SDValue HiZ = DAG.getSetCC(DL, CurrVT, Hi, Zero, ISD::SETEQ);

	Lo = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Lo);
	Hi = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Hi);
	Lo = DAG.getNode(ISD::AND, DL, CurrVT, Lo, HiZ);
	SDValue Res = DAG.getNode(ISD::ADD, DL, CurrVT, Lo, Hi);

	// Merge result back from vXi8 back to VT, working on the lo/hi halves
	// of the current vector width in the same way we did for the nibbles.
	// If the upper half of the input element is zero then add the halves'
	// leading zero counts together, otherwise just use the upper half's.
	// Double the width of the result until we are at target width.
	while (CurrVT != VT) {
	int CurrScalarSizeInBits = CurrVT.getScalarSizeInBits();
	int CurrNumElts = CurrVT.getVectorNumElements();
	MVT NextSVT = MVT::getIntegerVT(CurrScalarSizeInBits * 2);
	MVT NextVT = MVT::getVectorVT(NextSVT, CurrNumElts / 2);
	SDValue Shift = DAG.getConstant(CurrScalarSizeInBits, DL, NextVT);

	// Check if the upper half of the input element is zero.
	SDValue HiZ = DAG.getSetCC(DL, CurrVT, DAG.getBitcast(CurrVT, Op0),
	DAG.getBitcast(CurrVT, Zero), ISD::SETEQ);
	HiZ = DAG.getBitcast(NextVT, HiZ);

	// Move the upper/lower halves to the lower bits as we'll be extending to
	// NextVT. Mask the lower result to zero if HiZ is true and add the results
	// together.
	SDValue ResNext = Res = DAG.getBitcast(NextVT, Res);
	SDValue R0 = DAG.getNode(ISD::SRL, DL, NextVT, ResNext, Shift);
	SDValue R1 = DAG.getNode(ISD::SRL, DL, NextVT, HiZ, Shift);
	R1 = DAG.getNode(ISD::AND, DL, NextVT, ResNext, R1);
	Res = DAG.getNode(ISD::ADD, DL, NextVT, R0, R1);
	CurrVT = NextVT;
	}

	return Res;
	}

	static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();

	if (Subtarget.hasCDI())
	return LowerVectorCTLZ_AVX512CDI(Op, DAG);

	// Decompose 256-bit ops into smaller 128-bit ops.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return Lower256IntUnary(Op, DAG);

	// Decompose 512-bit ops into smaller 256-bit ops.
	if (VT.is512BitVector() && !Subtarget.hasBWI())
	return Lower512IntUnary(Op, DAG);

	assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
	return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
	}

	static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	MVT OpVT = VT;
	unsigned NumBits = VT.getSizeInBits();
	SDLoc dl(Op);
	unsigned Opc = Op.getOpcode();

	if (VT.isVector())
	return LowerVectorCTLZ(Op, dl, Subtarget, DAG);

	Op = Op.getOperand(0);
	if (VT == MVT::i8) {
	// Zero extend to i32 since there is not an i8 bsr.
	OpVT = MVT::i32;
	Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
	}

	// Issue a bsr (scan bits in reverse) which also sets EFLAGS.
	SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
	Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);

	if (Opc == ISD::CTLZ) {
	// If src is zero (i.e. bsr sets ZF), returns NumBits.
	SDValue Ops[] = {
	Op,
	DAG.getConstant(NumBits + NumBits - 1, dl, OpVT),
	DAG.getConstant(X86::COND_E, dl, MVT::i8),
	Op.getValue(1)
	};
	Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops);
	}

	// Finally xor with NumBits-1.
	Op = DAG.getNode(ISD::XOR, dl, OpVT, Op,
	DAG.getConstant(NumBits - 1, dl, OpVT));

	if (VT == MVT::i8)
	Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
	return Op;
	}

	static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	unsigned NumBits = VT.getScalarSizeInBits();
	SDLoc dl(Op);

	if (VT.isVector()) {
	SDValue N0 = Op.getOperand(0);
	SDValue Zero = DAG.getConstant(0, dl, VT);

	// lsb(x) = (x & -x)
	SDValue LSB = DAG.getNode(ISD::AND, dl, VT, N0,
	DAG.getNode(ISD::SUB, dl, VT, Zero, N0));

	// cttz_undef(x) = (width - 1) - ctlz(lsb)
	if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
	SDValue WidthMinusOne = DAG.getConstant(NumBits - 1, dl, VT);
	return DAG.getNode(ISD::SUB, dl, VT, WidthMinusOne,
	DAG.getNode(ISD::CTLZ, dl, VT, LSB));
	}

	// cttz(x) = ctpop(lsb - 1)
	SDValue One = DAG.getConstant(1, dl, VT);
	return DAG.getNode(ISD::CTPOP, dl, VT,
	DAG.getNode(ISD::SUB, dl, VT, LSB, One));
	}

	assert(Op.getOpcode() == ISD::CTTZ &&
	"Only scalar CTTZ requires custom lowering");

	// Issue a bsf (scan bits forward) which also sets EFLAGS.
	SDVTList VTs = DAG.getVTList(VT, MVT::i32);
	Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op.getOperand(0));

	// If src is zero (i.e. bsf sets ZF), returns NumBits.
	SDValue Ops[] = {
	Op,
	DAG.getConstant(NumBits, dl, VT),
	DAG.getConstant(X86::COND_E, dl, MVT::i8),
	Op.getValue(1)
	};
	return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);
	}

	/// Break a 256-bit integer operation into two new 128-bit ones and then
	/// concatenate the result back.
	static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();

	assert(VT.is256BitVector() && VT.isInteger() &&
	"Unsupported value type for operation");

	unsigned NumElems = VT.getVectorNumElements();
	SDLoc dl(Op);

	// Extract the LHS vectors
	SDValue LHS = Op.getOperand(0);
	SDValue LHS1 = extract128BitVector(LHS, 0, DAG, dl);
	SDValue LHS2 = extract128BitVector(LHS, NumElems / 2, DAG, dl);

	// Extract the RHS vectors
	SDValue RHS = Op.getOperand(1);
	SDValue RHS1 = extract128BitVector(RHS, 0, DAG, dl);
	SDValue RHS2 = extract128BitVector(RHS, NumElems / 2, DAG, dl);

	MVT EltVT = VT.getVectorElementType();
	MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
	DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
	DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
	}

	/// Break a 512-bit integer operation into two new 256-bit ones and then
	/// concatenate the result back.
	static SDValue Lower512IntArith(SDValue Op, SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();

	assert(VT.is512BitVector() && VT.isInteger() &&
	"Unsupported value type for operation");

	unsigned NumElems = VT.getVectorNumElements();
	SDLoc dl(Op);

	// Extract the LHS vectors
	SDValue LHS = Op.getOperand(0);
	SDValue LHS1 = extract256BitVector(LHS, 0, DAG, dl);
	SDValue LHS2 = extract256BitVector(LHS, NumElems / 2, DAG, dl);

	// Extract the RHS vectors
	SDValue RHS = Op.getOperand(1);
	SDValue RHS1 = extract256BitVector(RHS, 0, DAG, dl);
	SDValue RHS2 = extract256BitVector(RHS, NumElems / 2, DAG, dl);

	MVT EltVT = VT.getVectorElementType();
	MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
	DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
	DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
	}

	static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	if (VT.getScalarType() == MVT::i1)
	return DAG.getNode(ISD::XOR, SDLoc(Op), VT,
	Op.getOperand(0), Op.getOperand(1));
	assert(Op.getSimpleValueType().is256BitVector() &&
	Op.getSimpleValueType().isInteger() &&
	"Only handle AVX 256-bit vector integer operation");
	return Lower256IntArith(Op, DAG);
	}

	static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
	assert(Op.getSimpleValueType().is256BitVector() &&
	Op.getSimpleValueType().isInteger() &&
	"Only handle AVX 256-bit vector integer operation");
	return Lower256IntUnary(Op, DAG);
	}

	static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
	assert(Op.getSimpleValueType().is256BitVector() &&
	Op.getSimpleValueType().isInteger() &&
	"Only handle AVX 256-bit vector integer operation");
	return Lower256IntArith(Op, DAG);
	}

	static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();

	if (VT.getScalarType() == MVT::i1)
	return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));

	// Decompose 256-bit ops into smaller 128-bit ops.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return Lower256IntArith(Op, DAG);

	SDValue A = Op.getOperand(0);
	SDValue B = Op.getOperand(1);

	// Lower v16i8/v32i8/v64i8 mul as sign-extension to v8i16/v16i16/v32i16
	// vector pairs, multiply and truncate.
	if (VT == MVT::v16i8 \|\| VT == MVT::v32i8 \|\| VT == MVT::v64i8) {
	if (Subtarget.hasInt256()) {
	// For 512-bit vectors, split into 256-bit vectors to allow the
	// sign-extension to occur.
	if (VT == MVT::v64i8)
	return Lower512IntArith(Op, DAG);

	// For 256-bit vectors, split into 128-bit vectors to allow the
	// sign-extension to occur. We don't need this on AVX512BW as we can
	// safely sign-extend to v32i16.
	if (VT == MVT::v32i8 && !Subtarget.hasBWI())
	return Lower256IntArith(Op, DAG);

	MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
	return DAG.getNode(
	ISD::TRUNCATE, dl, VT,
	DAG.getNode(ISD::MUL, dl, ExVT,
	DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, A),
	DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, B)));
	}

	assert(VT == MVT::v16i8 &&
	"Pre-AVX2 support only supports v16i8 multiplication");
	MVT ExVT = MVT::v8i16;

	// Extract the lo parts and sign extend to i16
	SDValue ALo, BLo;
	if (Subtarget.hasSSE41()) {
	ALo = DAG.getSignExtendVectorInReg(A, dl, ExVT);
	BLo = DAG.getSignExtendVectorInReg(B, dl, ExVT);
	} else {
	const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
	-1, 4, -1, 5, -1, 6, -1, 7};
	ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
	BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
	ALo = DAG.getBitcast(ExVT, ALo);
	BLo = DAG.getBitcast(ExVT, BLo);
	ALo = DAG.getNode(ISD::SRA, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT));
	BLo = DAG.getNode(ISD::SRA, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT));
	}

	// Extract the hi parts and sign extend to i16
	SDValue AHi, BHi;
	if (Subtarget.hasSSE41()) {
	const int ShufMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
	-1, -1, -1, -1, -1, -1, -1, -1};
	AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
	BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
	AHi = DAG.getSignExtendVectorInReg(AHi, dl, ExVT);
	BHi = DAG.getSignExtendVectorInReg(BHi, dl, ExVT);
	} else {
	const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11,
	-1, 12, -1, 13, -1, 14, -1, 15};
	AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
	BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
	AHi = DAG.getBitcast(ExVT, AHi);
	BHi = DAG.getBitcast(ExVT, BHi);
	AHi = DAG.getNode(ISD::SRA, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT));
	BHi = DAG.getNode(ISD::SRA, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT));
	}

	// Multiply, mask the lower 8bits of the lo/hi results and pack
	SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
	SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
	RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT));
	RHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, DAG.getConstant(255, dl, ExVT));
	return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
	}

	// Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
	if (VT == MVT::v4i32) {
	assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() &&
	"Should not custom lower when pmuldq is available!");

	// Extract the odd parts.
	static const int UnpackMask[] = { 1, -1, 3, -1 };
	SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
	SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);

	// Multiply the even parts.
	SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, A, B);
	// Now multiply odd parts.
	SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds);

	Evens = DAG.getBitcast(VT, Evens);
	Odds = DAG.getBitcast(VT, Odds);

	// Merge the two vectors back together with a shuffle. This expands into 2
	// shuffles.
	static const int ShufMask[] = { 0, 4, 2, 6 };
	return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
	}

	assert((VT == MVT::v2i64 \|\| VT == MVT::v4i64 \|\| VT == MVT::v8i64) &&
	"Only know how to lower V2I64/V4I64/V8I64 multiply");

	// 32-bit vector types used for MULDQ/MULUDQ.
	MVT MulVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);

	// MULDQ returns the 64-bit result of the signed multiplication of the lower
	// 32-bits. We can lower with this if the sign bits stretch that far.
	if (Subtarget.hasSSE41() && DAG.ComputeNumSignBits(A) > 32 &&
	DAG.ComputeNumSignBits(B) > 32) {
	return DAG.getNode(X86ISD::PMULDQ, dl, VT, DAG.getBitcast(MulVT, A),
	DAG.getBitcast(MulVT, B));
	}

	// Ahi = psrlqi(a, 32);
	// Bhi = psrlqi(b, 32);
	//
	// AloBlo = pmuludq(a, b);
	// AloBhi = pmuludq(a, Bhi);
	// AhiBlo = pmuludq(Ahi, b);
	//
	// Hi = psllqi(AloBhi + AhiBlo, 32);
	// return AloBlo + Hi;
	APInt LowerBitsMask = APInt::getLowBitsSet(64, 32);
	bool ALoIsZero = DAG.MaskedValueIsZero(A, LowerBitsMask);
	bool BLoIsZero = DAG.MaskedValueIsZero(B, LowerBitsMask);

	APInt UpperBitsMask = APInt::getHighBitsSet(64, 32);
	bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask);
	bool BHiIsZero = DAG.MaskedValueIsZero(B, UpperBitsMask);

	// Bit cast to 32-bit vectors for MULUDQ.
	SDValue Alo = DAG.getBitcast(MulVT, A);
	SDValue Blo = DAG.getBitcast(MulVT, B);

	SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl);

	// Only multiply lo/hi halves that aren't known to be zero.
	SDValue AloBlo = Zero;
	if (!ALoIsZero && !BLoIsZero)
	AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Alo, Blo);

	SDValue AloBhi = Zero;
	if (!ALoIsZero && !BHiIsZero) {
	SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
	Bhi = DAG.getBitcast(MulVT, Bhi);
	AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Alo, Bhi);
	}

	SDValue AhiBlo = Zero;
	if (!AHiIsZero && !BLoIsZero) {
	SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
	Ahi = DAG.getBitcast(MulVT, Ahi);
	AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, Blo);
	}

	SDValue Hi = DAG.getNode(ISD::ADD, dl, VT, AloBhi, AhiBlo);
	Hi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Hi, 32, DAG);

	return DAG.getNode(ISD::ADD, dl, VT, AloBlo, Hi);
	}

	static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();

	// Decompose 256-bit ops into smaller 128-bit ops.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return Lower256IntArith(Op, DAG);

	// Only i8 vectors should need custom lowering after this.
	assert((VT == MVT::v16i8 \|\| (VT == MVT::v32i8 && Subtarget.hasInt256())) &&
	"Unsupported vector type");

	// Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply,
	// logical shift down the upper half and pack back to i8.
	SDValue A = Op.getOperand(0);
	SDValue B = Op.getOperand(1);

	// With SSE41 we can use sign/zero extend, but for pre-SSE41 we unpack
	// and then ashr/lshr the upper bits down to the lower bits before multiply.
	unsigned Opcode = Op.getOpcode();
	unsigned ExShift = (ISD::MULHU == Opcode ? ISD::SRL : ISD::SRA);
	unsigned ExSSE41 = (ISD::MULHU == Opcode ? X86ISD::VZEXT : X86ISD::VSEXT);

	// AVX2 implementations - extend xmm subvectors to ymm.
	if (Subtarget.hasInt256()) {
	SDValue Lo = DAG.getIntPtrConstant(0, dl);
	SDValue Hi = DAG.getIntPtrConstant(VT.getVectorNumElements() / 2, dl);

	if (VT == MVT::v32i8) {
	SDValue ALo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, A, Lo);
	SDValue BLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, B, Lo);
	SDValue AHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, A, Hi);
	SDValue BHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i8, B, Hi);
	ALo = DAG.getNode(ExSSE41, dl, MVT::v16i16, ALo);
	BLo = DAG.getNode(ExSSE41, dl, MVT::v16i16, BLo);
	AHi = DAG.getNode(ExSSE41, dl, MVT::v16i16, AHi);
	BHi = DAG.getNode(ExSSE41, dl, MVT::v16i16, BHi);
	Lo = DAG.getNode(ISD::SRL, dl, MVT::v16i16,
	DAG.getNode(ISD::MUL, dl, MVT::v16i16, ALo, BLo),
	DAG.getConstant(8, dl, MVT::v16i16));
	Hi = DAG.getNode(ISD::SRL, dl, MVT::v16i16,
	DAG.getNode(ISD::MUL, dl, MVT::v16i16, AHi, BHi),
	DAG.getConstant(8, dl, MVT::v16i16));
	// The ymm variant of PACKUS treats the 128-bit lanes separately, so before
	// using PACKUS we need to permute the inputs to the correct lo/hi xmm lane.
	const int LoMask[] = {0, 1, 2, 3, 4, 5, 6, 7,
	16, 17, 18, 19, 20, 21, 22, 23};
	const int HiMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
	24, 25, 26, 27, 28, 29, 30, 31};
	return DAG.getNode(X86ISD::PACKUS, dl, VT,
	DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, LoMask),
	DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, HiMask));
	}

	SDValue ExA = getExtendInVec(ExSSE41, dl, MVT::v16i16, A, DAG);
	SDValue ExB = getExtendInVec(ExSSE41, dl, MVT::v16i16, B, DAG);
	SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB);
	SDValue MulH = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul,
	DAG.getConstant(8, dl, MVT::v16i16));
	Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, MulH, Lo);
	Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, MulH, Hi);
	return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
	}

	assert(VT == MVT::v16i8 &&
	"Pre-AVX2 support only supports v16i8 multiplication");
	MVT ExVT = MVT::v8i16;

	// Extract the lo parts and zero/sign extend to i16.
	SDValue ALo, BLo;
	if (Subtarget.hasSSE41()) {
	ALo = getExtendInVec(ExSSE41, dl, ExVT, A, DAG);
	BLo = getExtendInVec(ExSSE41, dl, ExVT, B, DAG);
	} else {
	const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
	-1, 4, -1, 5, -1, 6, -1, 7};
	ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
	BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
	ALo = DAG.getBitcast(ExVT, ALo);
	BLo = DAG.getBitcast(ExVT, BLo);
	ALo = DAG.getNode(ExShift, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT));
	BLo = DAG.getNode(ExShift, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT));
	}

	// Extract the hi parts and zero/sign extend to i16.
	SDValue AHi, BHi;
	if (Subtarget.hasSSE41()) {
	const int ShufMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
	-1, -1, -1, -1, -1, -1, -1, -1};
	AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
	BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
	AHi = getExtendInVec(ExSSE41, dl, ExVT, AHi, DAG);
	BHi = getExtendInVec(ExSSE41, dl, ExVT, BHi, DAG);
	} else {
	const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11,
	-1, 12, -1, 13, -1, 14, -1, 15};
	AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
	BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
	AHi = DAG.getBitcast(ExVT, AHi);
	BHi = DAG.getBitcast(ExVT, BHi);
	AHi = DAG.getNode(ExShift, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT));
	BHi = DAG.getNode(ExShift, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT));
	}

	// Multiply, lshr the upper 8bits to the lower 8bits of the lo/hi results and
	// pack back to v16i8.
	SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
	SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
	RLo = DAG.getNode(ISD::SRL, dl, ExVT, RLo, DAG.getConstant(8, dl, ExVT));
	RHi = DAG.getNode(ISD::SRL, dl, ExVT, RHi, DAG.getConstant(8, dl, ExVT));
	return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
	}

	SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const {
	assert(Subtarget.isTargetWin64() && "Unexpected target");
	EVT VT = Op.getValueType();
	assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
	"Unexpected return type for lowering");

	RTLIB::Libcall LC;
	bool isSigned;
	switch (Op->getOpcode()) {
	default: llvm_unreachable("Unexpected request for libcall!");
	case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break;
	case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break;
	case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break;
	case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break;
	case ISD::SDIVREM: isSigned = true; LC = RTLIB::SDIVREM_I128; break;
	case ISD::UDIVREM: isSigned = false; LC = RTLIB::UDIVREM_I128; break;
	}

	SDLoc dl(Op);
	SDValue InChain = DAG.getEntryNode();

	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;
	for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
	EVT ArgVT = Op->getOperand(i).getValueType();
	assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 &&
	"Unexpected argument type for lowering");
	SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16);
	Entry.Node = StackPtr;
	InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr,
	MachinePointerInfo(), /* Alignment = */ 16);
	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());
	Entry.Ty = PointerType::get(ArgTy,0);
	Entry.IsSExt = false;
	Entry.IsZExt = false;
	Args.push_back(Entry);
	}

	SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
	getPointerTy(DAG.getDataLayout()));

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl)
	.setChain(InChain)
	.setLibCallee(
	getLibcallCallingConv(LC),
	static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()), Callee,
	std::move(Args))
	.setInRegister()
	.setSExtResult(isSigned)
	.setZExtResult(!isSigned);

	std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
	return DAG.getBitcast(VT, CallInfo.first);
	}

	static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
	MVT VT = Op0.getSimpleValueType();
	SDLoc dl(Op);

	// Decompose 256-bit ops into smaller 128-bit ops.
	if (VT.is256BitVector() && !Subtarget.hasInt256()) {
	unsigned Opcode = Op.getOpcode();
	unsigned NumElems = VT.getVectorNumElements();
	MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), NumElems / 2);
	SDValue Lo0 = extract128BitVector(Op0, 0, DAG, dl);
	SDValue Lo1 = extract128BitVector(Op1, 0, DAG, dl);
	SDValue Hi0 = extract128BitVector(Op0, NumElems / 2, DAG, dl);
	SDValue Hi1 = extract128BitVector(Op1, NumElems / 2, DAG, dl);
	SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(HalfVT, HalfVT), Lo0, Lo1);
	SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(HalfVT, HalfVT), Hi0, Hi1);
	SDValue Ops[] = {
	DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo.getValue(0), Hi.getValue(0)),
	DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo.getValue(1), Hi.getValue(1))
	};
	return DAG.getMergeValues(Ops, dl);
	}

	assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) \|\|
	(VT == MVT::v8i32 && Subtarget.hasInt256()));

	// PMULxD operations multiply each even value (starting at 0) of LHS with
	// the related value of RHS and produce a widen result.
	// E.g., PMULUDQ <4 x i32> <a\|b\|c\|d>, <4 x i32> <e\|f\|g\|h>
	// => <2 x i64> <ae\|cg>
	//
	// In other word, to have all the results, we need to perform two PMULxD:
	// 1. one with the even values.
	// 2. one with the odd values.
	// To achieve #2, with need to place the odd values at an even position.
	//
	// Place the odd value at an even position (basically, shift all values 1
	// step to the left):
	const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1};
	// <a\|b\|c\|d> => <b\|undef\|d\|undef>
	SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0,
	makeArrayRef(&Mask[0], VT.getVectorNumElements()));
	// <e\|f\|g\|h> => <f\|undef\|h\|undef>
	SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1,
	makeArrayRef(&Mask[0], VT.getVectorNumElements()));

	// Emit two multiplies, one for the lower 2 ints and one for the higher 2
	// ints.
	MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64;
	bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
	unsigned Opcode =
	(!IsSigned \|\| !Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
	// PMULUDQ <4 x i32> <a\|b\|c\|d>, <4 x i32> <e\|f\|g\|h>
	// => <2 x i64> <ae\|cg>
	SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
	// PMULUDQ <4 x i32> <b\|undef\|d\|undef>, <4 x i32> <f\|undef\|h\|undef>
	// => <2 x i64> <bf\|dh>
	SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));

	// Shuffle it back into the right order.
	SDValue Highs, Lows;
	if (VT == MVT::v8i32) {
	const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
	Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
	const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
	Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
	} else {
	const int HighMask[] = {1, 5, 3, 7};
	Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
	const int LowMask[] = {0, 4, 2, 6};
	Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
	}

	// If we have a signed multiply but no PMULDQ fix up the high parts of a
	// unsigned multiply.
	if (IsSigned && !Subtarget.hasSSE41()) {
	SDValue ShAmt = DAG.getConstant(
	31, dl,
	DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout()));
	SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
	DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
	SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
	DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0);

	SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
	Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
	}

	// The first result of MUL_LOHI is actually the low value, followed by the
	// high value.
	SDValue Ops[] = {Lows, Highs};
	return DAG.getMergeValues(Ops, dl);
	}

	// Return true if the required (according to Opcode) shift-imm form is natively
	// supported by the Subtarget
	static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
	unsigned Opcode) {
	if (VT.getScalarSizeInBits() < 16)
	return false;

	if (VT.is512BitVector() && Subtarget.hasAVX512() &&
	(VT.getScalarSizeInBits() > 16 \|\| Subtarget.hasBWI()))
	return true;

	bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) \|\|
	(VT.is256BitVector() && Subtarget.hasInt256());

	bool AShift = LShift && (Subtarget.hasAVX512() \|\|
	(VT != MVT::v2i64 && VT != MVT::v4i64));
	return (Opcode == ISD::SRA) ? AShift : LShift;
	}

	// The shift amount is a variable, but it is the same for all vector lanes.
	// These instructions are defined together with shift-immediate.
	static
	bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget,
	unsigned Opcode) {
	return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
	}

	// Return true if the required (according to Opcode) variable-shift form is
	// natively supported by the Subtarget
	static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
	unsigned Opcode) {

	if (!Subtarget.hasInt256() \|\| VT.getScalarSizeInBits() < 16)
	return false;

	// vXi16 supported only on AVX-512, BWI
	if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI())
	return false;

	if (Subtarget.hasAVX512())
	return true;

	bool LShift = VT.is128BitVector() \|\| VT.is256BitVector();
	bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64;
	return (Opcode == ISD::SRA) ? AShift : LShift;
	}

	static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);
	SDValue R = Op.getOperand(0);
	SDValue Amt = Op.getOperand(1);

	unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI :
	(Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;

	auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) {
	assert((VT == MVT::v2i64 \|\| VT == MVT::v4i64) && "Unexpected SRA type");
	MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);
	SDValue Ex = DAG.getBitcast(ExVT, R);

	// ashr(R, 63) === cmp_slt(R, 0)
	if (ShiftAmt == 63 && Subtarget.hasSSE42()) {
	assert((VT != MVT::v4i64 \|\| Subtarget.hasInt256()) &&
	"Unsupported PCMPGT op");
	return DAG.getNode(X86ISD::PCMPGT, dl, VT,
	getZeroVector(VT, Subtarget, DAG, dl), R);
	}

	if (ShiftAmt >= 32) {
	// Splat sign to upper i32 dst, and SRA upper i32 src to lower i32.
	SDValue Upper =
	getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG);
	SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
	ShiftAmt - 32, DAG);
	if (VT == MVT::v2i64)
	Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3});
	if (VT == MVT::v4i64)
	Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
	{9, 1, 11, 3, 13, 5, 15, 7});
	} else {
	// SRA upper i32, SHL whole i64 and select lower i32.
	SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
	ShiftAmt, DAG);
	SDValue Lower =
	getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG);
	Lower = DAG.getBitcast(ExVT, Lower);
	if (VT == MVT::v2i64)
	Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3});
	if (VT == MVT::v4i64)
	Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
	{8, 1, 10, 3, 12, 5, 14, 7});
	}
	return DAG.getBitcast(VT, Ex);
	};

	// Optimize shl/srl/sra with constant shift amount.
	if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
	if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
	uint64_t ShiftAmt = ShiftConst->getZExtValue();

	if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
	return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);

	// i64 SRA needs to be performed as partial shifts.
	if (((!Subtarget.hasXOP() && VT == MVT::v2i64) \|\|
	(Subtarget.hasInt256() && VT == MVT::v4i64)) &&
	Op.getOpcode() == ISD::SRA)
	return ArithmeticShiftRight64(ShiftAmt);

	if (VT == MVT::v16i8 \|\|
	(Subtarget.hasInt256() && VT == MVT::v32i8) \|\|
	VT == MVT::v64i8) {
	unsigned NumElts = VT.getVectorNumElements();
	MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);

	// Simple i8 add case
	if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
	return DAG.getNode(ISD::ADD, dl, VT, R, R);

	// ashr(R, 7) === cmp_slt(R, 0)
	if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
	SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
	if (VT.is512BitVector()) {
	assert(VT == MVT::v64i8 && "Unexpected element type!");
	SDValue CMP = DAG.getNode(X86ISD::PCMPGTM, dl, MVT::v64i1, Zeros, R);
	return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
	}
	return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
	}

	// XOP can shift v16i8 directly instead of as shift v8i16 + mask.
	if (VT == MVT::v16i8 && Subtarget.hasXOP())
	return SDValue();

	if (Op.getOpcode() == ISD::SHL) {
	// Make a large shift.
	SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT,
	R, ShiftAmt, DAG);
	SHL = DAG.getBitcast(VT, SHL);
	// Zero out the rightmost bits.
	return DAG.getNode(ISD::AND, dl, VT, SHL,
	DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
	}
	if (Op.getOpcode() == ISD::SRL) {
	// Make a large shift.
	SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT,
	R, ShiftAmt, DAG);
	SRL = DAG.getBitcast(VT, SRL);
	// Zero out the leftmost bits.
	return DAG.getNode(ISD::AND, dl, VT, SRL,
	DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT));
	}
	if (Op.getOpcode() == ISD::SRA) {
	// ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
	SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);

	SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
	Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
	Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
	return Res;
	}
	llvm_unreachable("Unknown shift opcode.");
	}
	}
	}

	// Special case in 32-bit mode, where i64 is expanded into high and low parts.
	// TODO: Replace constant extraction with getTargetConstantBitsFromNode.
	if (!Subtarget.is64Bit() && !Subtarget.hasXOP() &&
	(VT == MVT::v2i64 \|\| (Subtarget.hasInt256() && VT == MVT::v4i64) \|\|
	(Subtarget.hasAVX512() && VT == MVT::v8i64))) {

	// AVX1 targets maybe extracting a 128-bit vector from a 256-bit constant.
	unsigned SubVectorScale = 1;
	if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
	SubVectorScale =
	Amt.getOperand(0).getValueSizeInBits() / Amt.getValueSizeInBits();
	Amt = Amt.getOperand(0);
	}

	// Peek through any splat that was introduced for i64 shift vectorization.
	int SplatIndex = -1;
	if (ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt.getNode()))
	if (SVN->isSplat()) {
	SplatIndex = SVN->getSplatIndex();
	Amt = Amt.getOperand(0);
	assert(SplatIndex < (int)VT.getVectorNumElements() &&
	"Splat shuffle referencing second operand");
	}

	if (Amt.getOpcode() != ISD::BITCAST \|\|
	Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR)
	return SDValue();

	Amt = Amt.getOperand(0);
	unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
	(SubVectorScale * VT.getVectorNumElements());
	unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
	uint64_t ShiftAmt = 0;
	unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio);
	for (unsigned i = 0; i != Ratio; ++i) {
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + BaseOp));
	if (!C)
	return SDValue();
	// 6 == Log2(64)
	ShiftAmt \|= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
	}

	// Check remaining shift amounts (if not a splat).
	if (SplatIndex < 0) {
	for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
	uint64_t ShAmt = 0;
	for (unsigned j = 0; j != Ratio; ++j) {
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
	if (!C)
	return SDValue();
	// 6 == Log2(64)
	ShAmt \|= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
	}
	if (ShAmt != ShiftAmt)
	return SDValue();
	}
	}

	if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
	return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);

	if (Op.getOpcode() == ISD::SRA)
	return ArithmeticShiftRight64(ShiftAmt);
	}

	return SDValue();
	}

	static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);
	SDValue R = Op.getOperand(0);
	SDValue Amt = Op.getOperand(1);

	unsigned X86OpcI = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI :
	(Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;

	unsigned X86OpcV = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHL :
	(Op.getOpcode() == ISD::SRL) ? X86ISD::VSRL : X86ISD::VSRA;

	if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) {
	SDValue BaseShAmt;
	MVT EltVT = VT.getVectorElementType();

	if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Amt)) {
	// Check if this build_vector node is doing a splat.
	// If so, then set BaseShAmt equal to the splat value.
	BaseShAmt = BV->getSplatValue();
	if (BaseShAmt && BaseShAmt.isUndef())
	BaseShAmt = SDValue();
	} else {
	if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
	Amt = Amt.getOperand(0);

	ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt);
	if (SVN && SVN->isSplat()) {
	unsigned SplatIdx = (unsigned)SVN->getSplatIndex();
	SDValue InVec = Amt.getOperand(0);
	if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
	assert((SplatIdx < InVec.getSimpleValueType().getVectorNumElements()) &&
	"Unexpected shuffle index found!");
	BaseShAmt = InVec.getOperand(SplatIdx);
	} else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
	if (ConstantSDNode *C =
	dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
	if (C->getZExtValue() == SplatIdx)
	BaseShAmt = InVec.getOperand(1);
	}
	}

	if (!BaseShAmt)
	// Avoid introducing an extract element from a shuffle.
	BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InVec,
	DAG.getIntPtrConstant(SplatIdx, dl));
	}
	}

	if (BaseShAmt.getNode()) {
	assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
	if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
	BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt);
	else if (EltVT.bitsLT(MVT::i32))
	BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);

	return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
	}
	}

	// Special case in 32-bit mode, where i64 is expanded into high and low parts.
	if (!Subtarget.is64Bit() && VT == MVT::v2i64 &&
	Amt.getOpcode() == ISD::BITCAST &&
	Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
	Amt = Amt.getOperand(0);
	unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
	VT.getVectorNumElements();
	std::vector<SDValue> Vals(Ratio);
	for (unsigned i = 0; i != Ratio; ++i)
	Vals[i] = Amt.getOperand(i);
	for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
	for (unsigned j = 0; j != Ratio; ++j)
	if (Vals[j] != Amt.getOperand(i + j))
	return SDValue();
	}

	if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode()))
	return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1));
	}
	return SDValue();
	}

	static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	SDLoc dl(Op);
	SDValue R = Op.getOperand(0);
	SDValue Amt = Op.getOperand(1);
	bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());

	assert(VT.isVector() && "Custom lowering only for vector shifts!");
	assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!");

	if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget))
	return V;

	if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
	return V;

	if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode()))
	return Op;

	// XOP has 128-bit variable logical/arithmetic shifts.
	// +ve/-ve Amt = shift left/right.
	if (Subtarget.hasXOP() &&
	(VT == MVT::v2i64 \|\| VT == MVT::v4i32 \|\|
	VT == MVT::v8i16 \|\| VT == MVT::v16i8)) {
	if (Op.getOpcode() == ISD::SRL \|\| Op.getOpcode() == ISD::SRA) {
	SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl);
	Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt);
	}
	if (Op.getOpcode() == ISD::SHL \|\| Op.getOpcode() == ISD::SRL)
	return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt);
	if (Op.getOpcode() == ISD::SRA)
	return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt);
	}

	// 2i64 vector logical shifts can efficiently avoid scalarization - do the
	// shifts per-lane and then shuffle the partial results back together.
	if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) {
	// Splat the shift amounts so the scalar shifts above will catch it.
	SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0});
	SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1});
	SDValue R0 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt0);
	SDValue R1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt1);
	return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
	}

	// i64 vector arithmetic shift can be emulated with the transform:
	// M = lshr(SIGN_MASK, Amt)
	// ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
	if ((VT == MVT::v2i64 \|\| (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
	Op.getOpcode() == ISD::SRA) {
	SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
	SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
	R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
	R = DAG.getNode(ISD::XOR, dl, VT, R, M);
	R = DAG.getNode(ISD::SUB, dl, VT, R, M);
	return R;
	}

	// If possible, lower this packed shift into a vector multiply instead of
	// expanding it into a sequence of scalar shifts.
	// Do this only if the vector shift count is a constant build_vector.
	if (ConstantAmt && Op.getOpcode() == ISD::SHL &&
	(VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
	(Subtarget.hasInt256() && VT == MVT::v16i16))) {
	SmallVector<SDValue, 8> Elts;
	MVT SVT = VT.getVectorElementType();
	unsigned SVTBits = SVT.getSizeInBits();
	APInt One(SVTBits, 1);
	unsigned NumElems = VT.getVectorNumElements();

	for (unsigned i=0; i !=NumElems; ++i) {
	SDValue Op = Amt->getOperand(i);
	if (Op->isUndef()) {
	Elts.push_back(Op);
	continue;
	}

	ConstantSDNode *ND = cast<ConstantSDNode>(Op);
	APInt C(SVTBits, ND->getAPIntValue().getZExtValue());
	uint64_t ShAmt = C.getZExtValue();
	if (ShAmt >= SVTBits) {
	Elts.push_back(DAG.getUNDEF(SVT));
	continue;
	}
	Elts.push_back(DAG.getConstant(One.shl(ShAmt), dl, SVT));
	}
	SDValue BV = DAG.getBuildVector(VT, dl, Elts);
	return DAG.getNode(ISD::MUL, dl, VT, R, BV);
	}

	// Lower SHL with variable shift amount.
	if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
	Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT));

	Op = DAG.getNode(ISD::ADD, dl, VT, Op,
	DAG.getConstant(0x3f800000U, dl, VT));
	Op = DAG.getBitcast(MVT::v4f32, Op);
	Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
	return DAG.getNode(ISD::MUL, dl, VT, Op, R);
	}

	// If possible, lower this shift as a sequence of two shifts by
	// constant plus a MOVSS/MOVSD/PBLEND instead of scalarizing it.
	// Example:
	// (v4i32 (srl A, (build_vector < X, Y, Y, Y>)))
	//
	// Could be rewritten as:
	// (v4i32 (MOVSS (srl A, <Y,Y,Y,Y>), (srl A, <X,X,X,X>)))
	//
	// The advantage is that the two shifts from the example would be
	// lowered as X86ISD::VSRLI nodes. This would be cheaper than scalarizing
	// the vector shift into four scalar shifts plus four pairs of vector
	// insert/extract.
	if (ConstantAmt && (VT == MVT::v8i16 \|\| VT == MVT::v4i32)) {
	unsigned TargetOpcode = X86ISD::MOVSS;
	bool CanBeSimplified;
	// The splat value for the first packed shift (the 'X' from the example).
	SDValue Amt1 = Amt->getOperand(0);
	// The splat value for the second packed shift (the 'Y' from the example).
	SDValue Amt2 = (VT == MVT::v4i32) ? Amt->getOperand(1) : Amt->getOperand(2);

	// See if it is possible to replace this node with a sequence of
	// two shifts followed by a MOVSS/MOVSD/PBLEND.
	if (VT == MVT::v4i32) {
	// Check if it is legal to use a MOVSS.
	CanBeSimplified = Amt2 == Amt->getOperand(2) &&
	Amt2 == Amt->getOperand(3);
	if (!CanBeSimplified) {
	// Otherwise, check if we can still simplify this node using a MOVSD.
	CanBeSimplified = Amt1 == Amt->getOperand(1) &&
	Amt->getOperand(2) == Amt->getOperand(3);
	TargetOpcode = X86ISD::MOVSD;
	Amt2 = Amt->getOperand(2);
	}
	} else {
	// Do similar checks for the case where the machine value type
	// is MVT::v8i16.
	CanBeSimplified = Amt1 == Amt->getOperand(1);
	for (unsigned i=3; i != 8 && CanBeSimplified; ++i)
	CanBeSimplified = Amt2 == Amt->getOperand(i);

	if (!CanBeSimplified) {
	TargetOpcode = X86ISD::MOVSD;
	CanBeSimplified = true;
	Amt2 = Amt->getOperand(4);
	for (unsigned i=0; i != 4 && CanBeSimplified; ++i)
	CanBeSimplified = Amt1 == Amt->getOperand(i);
	for (unsigned j=4; j != 8 && CanBeSimplified; ++j)
	CanBeSimplified = Amt2 == Amt->getOperand(j);
	}
	}

	if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
	isa<ConstantSDNode>(Amt2)) {
	// Replace this node with two shifts followed by a MOVSS/MOVSD/PBLEND.
	MVT CastVT = MVT::v4i32;
	SDValue Splat1 =
	DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
	SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
	SDValue Splat2 =
	DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), dl, VT);
	SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
	SDValue BitCast1 = DAG.getBitcast(CastVT, Shift1);
	SDValue BitCast2 = DAG.getBitcast(CastVT, Shift2);
	if (TargetOpcode == X86ISD::MOVSD)
	return DAG.getBitcast(VT, DAG.getVectorShuffle(CastVT, dl, BitCast1,
	BitCast2, {0, 1, 6, 7}));
	return DAG.getBitcast(VT, DAG.getVectorShuffle(CastVT, dl, BitCast1,
	BitCast2, {0, 5, 6, 7}));
	}
	}

	// v4i32 Non Uniform Shifts.
	// If the shift amount is constant we can shift each lane using the SSE2
	// immediate shifts, else we need to zero-extend each lane to the lower i64
	// and shift using the SSE2 variable shifts.
	// The separate results can then be blended together.
	if (VT == MVT::v4i32) {
	unsigned Opc = Op.getOpcode();
	SDValue Amt0, Amt1, Amt2, Amt3;
	if (ConstantAmt) {
	Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});
	Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1});
	Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2});
	Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3});
	} else {
	// ISD::SHL is handled above but we include it here for completeness.
	switch (Opc) {
	default:
	llvm_unreachable("Unknown target vector shift node");
	case ISD::SHL:
	Opc = X86ISD::VSHL;
	break;
	case ISD::SRL:
	Opc = X86ISD::VSRL;
	break;
	case ISD::SRA:
	Opc = X86ISD::VSRA;
	break;
	}
	// The SSE2 shifts use the lower i64 as the same shift amount for
	// all lanes and the upper i64 is ignored. These shuffle masks
	// optimally zero-extend each lanes on SSE2/SSE41/AVX targets.
	SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
	Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});
	Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});
	Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});
	Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1});
	}

	SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);
	SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);
	SDValue R2 = DAG.getNode(Opc, dl, VT, R, Amt2);
	SDValue R3 = DAG.getNode(Opc, dl, VT, R, Amt3);
	SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1});
	SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7});
	return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
	}

	// It's worth extending once and using the vXi16/vXi32 shifts for smaller
	// types, but without AVX512 the extra overheads to get from vXi8 to vXi32
	// make the existing SSE solution better.
	if ((Subtarget.hasInt256() && VT == MVT::v8i16) \|\|
	(Subtarget.hasAVX512() && VT == MVT::v16i16) \|\|
	(Subtarget.hasAVX512() && VT == MVT::v16i8) \|\|
	(Subtarget.hasBWI() && VT == MVT::v32i8)) {
	MVT EvtSVT = (VT == MVT::v32i8 ? MVT::i16 : MVT::i32);
	MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());
	unsigned ExtOpc =
	Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	R = DAG.getNode(ExtOpc, dl, ExtVT, R);
	Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);
	return DAG.getNode(ISD::TRUNCATE, dl, VT,
	DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
	}

	if (VT == MVT::v16i8 \|\|
	(VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) \|\|
	(VT == MVT::v64i8 && Subtarget.hasBWI())) {
	MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
	unsigned ShiftOpcode = Op->getOpcode();

	auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
	if (VT.is512BitVector()) {
	// On AVX512BW targets we make use of the fact that VSELECT lowers
	// to a masked blend which selects bytes based just on the sign bit
	// extracted to a mask.
	MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
	V0 = DAG.getBitcast(VT, V0);
	V1 = DAG.getBitcast(VT, V1);
	Sel = DAG.getBitcast(VT, Sel);
	Sel = DAG.getNode(X86ISD::CVT2MASK, dl, MaskVT, Sel);
	return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1));
	} else if (Subtarget.hasSSE41()) {
	// On SSE41 targets we make use of the fact that VSELECT lowers
	// to PBLENDVB which selects bytes based just on the sign bit.
	V0 = DAG.getBitcast(VT, V0);
	V1 = DAG.getBitcast(VT, V1);
	Sel = DAG.getBitcast(VT, Sel);
	return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1));
	}
	// On pre-SSE41 targets we test for the sign bit by comparing to
	// zero - a negative value will set all bits of the lanes to true
	// and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering.
	SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl);
	SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel);
	return DAG.getSelect(dl, SelVT, C, V0, V1);
	};

	// Turn 'a' into a mask suitable for VSELECT: a = a << 5;
	// We can safely do this using i16 shifts as we're only interested in
	// the 3 lower bits of each byte.
	Amt = DAG.getBitcast(ExtVT, Amt);
	Amt = DAG.getNode(ISD::SHL, dl, ExtVT, Amt, DAG.getConstant(5, dl, ExtVT));
	Amt = DAG.getBitcast(VT, Amt);

	if (Op->getOpcode() == ISD::SHL \|\| Op->getOpcode() == ISD::SRL) {
	// r = VSELECT(r, shift(r, 4), a);
	SDValue M =
	DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
	R = SignBitSelect(VT, Amt, M, R);

	// a += a
	Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);

	// r = VSELECT(r, shift(r, 2), a);
	M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
	R = SignBitSelect(VT, Amt, M, R);

	// a += a
	Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);

	// return VSELECT(r, shift(r, 1), a);
	M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
	R = SignBitSelect(VT, Amt, M, R);
	return R;
	}

	if (Op->getOpcode() == ISD::SRA) {
	// For SRA we need to unpack each byte to the higher byte of a i16 vector
	// so we can correctly sign extend. We don't care what happens to the
	// lower byte.
	SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), Amt);
	SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), Amt);
	SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, DAG.getUNDEF(VT), R);
	SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, DAG.getUNDEF(VT), R);
	ALo = DAG.getBitcast(ExtVT, ALo);
	AHi = DAG.getBitcast(ExtVT, AHi);
	RLo = DAG.getBitcast(ExtVT, RLo);
	RHi = DAG.getBitcast(ExtVT, RHi);

	// r = VSELECT(r, shift(r, 4), a);
	SDValue MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
	DAG.getConstant(4, dl, ExtVT));
	SDValue MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
	DAG.getConstant(4, dl, ExtVT));
	RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
	RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);

	// a += a
	ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo);
	AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);

	// r = VSELECT(r, shift(r, 2), a);
	MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
	DAG.getConstant(2, dl, ExtVT));
	MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
	DAG.getConstant(2, dl, ExtVT));
	RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
	RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);

	// a += a
	ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo);
	AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi);

	// r = VSELECT(r, shift(r, 1), a);
	MLo = DAG.getNode(ShiftOpcode, dl, ExtVT, RLo,
	DAG.getConstant(1, dl, ExtVT));
	MHi = DAG.getNode(ShiftOpcode, dl, ExtVT, RHi,
	DAG.getConstant(1, dl, ExtVT));
	RLo = SignBitSelect(ExtVT, ALo, MLo, RLo);
	RHi = SignBitSelect(ExtVT, AHi, MHi, RHi);

	// Logical shift the result back to the lower byte, leaving a zero upper
	// byte
	// meaning that we can safely pack with PACKUSWB.
	RLo =
	DAG.getNode(ISD::SRL, dl, ExtVT, RLo, DAG.getConstant(8, dl, ExtVT));
	RHi =
	DAG.getNode(ISD::SRL, dl, ExtVT, RHi, DAG.getConstant(8, dl, ExtVT));
	return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
	}
	}

	if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
	MVT ExtVT = MVT::v8i32;
	SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
	SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
	SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z);
	SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Z, R);
	SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Z, R);
	ALo = DAG.getBitcast(ExtVT, ALo);
	AHi = DAG.getBitcast(ExtVT, AHi);
	RLo = DAG.getBitcast(ExtVT, RLo);
	RHi = DAG.getBitcast(ExtVT, RHi);
	SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo);
	SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi);
	Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT));
	Hi = DAG.getNode(ISD::SRL, dl, ExtVT, Hi, DAG.getConstant(16, dl, ExtVT));
	return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
	}

	if (VT == MVT::v8i16) {
	unsigned ShiftOpcode = Op->getOpcode();

	// If we have a constant shift amount, the non-SSE41 path is best as
	// avoiding bitcasts make it easier to constant fold and reduce to PBLENDW.
	bool UseSSE41 = Subtarget.hasSSE41() &&
	!ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());

	auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) {
	// On SSE41 targets we make use of the fact that VSELECT lowers
	// to PBLENDVB which selects bytes based just on the sign bit.
	if (UseSSE41) {
	MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
	V0 = DAG.getBitcast(ExtVT, V0);
	V1 = DAG.getBitcast(ExtVT, V1);
	Sel = DAG.getBitcast(ExtVT, Sel);
	return DAG.getBitcast(VT, DAG.getSelect(dl, ExtVT, Sel, V0, V1));
	}
	// On pre-SSE41 targets we splat the sign bit - a negative value will
	// set all bits of the lanes to true and VSELECT uses that in
	// its OR(AND(V0,C),AND(V1,~C)) lowering.
	SDValue C =
	DAG.getNode(ISD::SRA, dl, VT, Sel, DAG.getConstant(15, dl, VT));
	return DAG.getSelect(dl, VT, C, V0, V1);
	};

	// Turn 'a' into a mask suitable for VSELECT: a = a << 12;
	if (UseSSE41) {
	// On SSE41 targets we need to replicate the shift mask in both
	// bytes for PBLENDVB.
	Amt = DAG.getNode(
	ISD::OR, dl, VT,
	DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(4, dl, VT)),
	DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT)));
	} else {
	Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(12, dl, VT));
	}

	// r = VSELECT(r, shift(r, 8), a);
	SDValue M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(8, dl, VT));
	R = SignBitSelect(Amt, M, R);

	// a += a
	Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);

	// r = VSELECT(r, shift(r, 4), a);
	M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(4, dl, VT));
	R = SignBitSelect(Amt, M, R);

	// a += a
	Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);

	// r = VSELECT(r, shift(r, 2), a);
	M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(2, dl, VT));
	R = SignBitSelect(Amt, M, R);

	// a += a
	Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);

	// return VSELECT(r, shift(r, 1), a);
	M = DAG.getNode(ShiftOpcode, dl, VT, R, DAG.getConstant(1, dl, VT));
	R = SignBitSelect(Amt, M, R);
	return R;
	}

	// Decompose 256-bit shifts into smaller 128-bit shifts.
	if (VT.is256BitVector())
	return Lower256IntArith(Op, DAG);

	return SDValue();
	}

	static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	SDLoc DL(Op);
	SDValue R = Op.getOperand(0);
	SDValue Amt = Op.getOperand(1);
	unsigned Opcode = Op.getOpcode();
	unsigned EltSizeInBits = VT.getScalarSizeInBits();

	if (Subtarget.hasAVX512()) {
	// Attempt to rotate by immediate.
	APInt UndefElts;
	SmallVector<APInt, 16> EltBits;
	if (getTargetConstantBitsFromNode(Amt, EltSizeInBits, UndefElts, EltBits)) {
	if (!UndefElts && llvm::all_of(EltBits, [EltBits](APInt &V) {
	return EltBits[0] == V;
	})) {
	unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
	uint64_t RotateAmt = EltBits[0].urem(EltSizeInBits);
	return DAG.getNode(Op, DL, VT, R,
	DAG.getConstant(RotateAmt, DL, MVT::i8));
	}
	}

	// Else, fall-back on VPROLV/VPRORV.
	return Op;
	}

	assert(VT.isVector() && "Custom lowering only for vector rotates!");
	assert(Subtarget.hasXOP() && "XOP support required for vector rotates!");
	assert((Opcode == ISD::ROTL) && "Only ROTL supported");

	// XOP has 128-bit vector variable + immediate rotates.
	// +ve/-ve Amt = rotate left/right.

	// Split 256-bit integers.
	if (VT.is256BitVector())
	return Lower256IntArith(Op, DAG);

	assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");

	// Attempt to rotate by immediate.
	if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
	if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
	uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
	assert(RotateAmt < EltSizeInBits && "Rotation out of range");
	return DAG.getNode(X86ISD::VPROTI, DL, VT, R,
	DAG.getConstant(RotateAmt, DL, MVT::i8));
	}
	}

	// Use general rotate by variable (per-element).
	return DAG.getNode(X86ISD::VPROT, DL, VT, R, Amt);
	}

	static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
	// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
	// a "setcc" instruction that checks the overflow flag. The "brcond" lowering
	// looks for this combo and may remove the "setcc" instruction if the "setcc"
	// has only one use.
	SDNode *N = Op.getNode();
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	unsigned BaseOp = 0;
	X86::CondCode Cond;
	SDLoc DL(Op);
	switch (Op.getOpcode()) {
	default: llvm_unreachable("Unknown ovf instruction!");
	case ISD::SADDO:
	// A subtract of one will be selected as a INC. Note that INC doesn't
	// set CF, so we can't do this for UADDO.
	if (isOneConstant(RHS)) {
	BaseOp = X86ISD::INC;
	Cond = X86::COND_O;
	break;
	}
	BaseOp = X86ISD::ADD;
	Cond = X86::COND_O;
	break;
	case ISD::UADDO:
	BaseOp = X86ISD::ADD;
	Cond = X86::COND_B;
	break;
	case ISD::SSUBO:
	// A subtract of one will be selected as a DEC. Note that DEC doesn't
	// set CF, so we can't do this for USUBO.
	if (isOneConstant(RHS)) {
	BaseOp = X86ISD::DEC;
	Cond = X86::COND_O;
	break;
	}
	BaseOp = X86ISD::SUB;
	Cond = X86::COND_O;
	break;
	case ISD::USUBO:
	BaseOp = X86ISD::SUB;
	Cond = X86::COND_B;
	break;
	case ISD::SMULO:
	BaseOp = N->getValueType(0) == MVT::i8 ? X86ISD::SMUL8 : X86ISD::SMUL;
	Cond = X86::COND_O;
	break;
	case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
	if (N->getValueType(0) == MVT::i8) {
	BaseOp = X86ISD::UMUL8;
	Cond = X86::COND_O;
	break;
	}
	SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
	MVT::i32);
	SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);

	SDValue SetCC = getSETCC(X86::COND_O, SDValue(Sum.getNode(), 2), DL, DAG);

	if (N->getValueType(1) == MVT::i1)
	SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);

	return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
	}
	}

	// Also sets EFLAGS.
	SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
	SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);

	SDValue SetCC = getSETCC(Cond, SDValue(Sum.getNode(), 1), DL, DAG);

	if (N->getValueType(1) == MVT::i1)
	SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);

	return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
	}

	/// Returns true if the operand type is exactly twice the native width, and
	/// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
	/// Used to know whether to use cmpxchg8/16b when expanding atomic operations
	/// (otherwise we leave them alone to become __sync_fetch_and_... calls).
	bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
	unsigned OpWidth = MemType->getPrimitiveSizeInBits();

	if (OpWidth == 64)
	return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
	else if (OpWidth == 128)
	return Subtarget.hasCmpxchg16b();
	else
	return false;
	}

	bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
	return needsCmpXchgNb(SI->getValueOperand()->getType());
	}

	// Note: this turns large loads into lock cmpxchg8b/16b.
	// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
	TargetLowering::AtomicExpansionKind
	X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
	auto PTy = cast<PointerType>(LI->getPointerOperandType());
	return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg
	: AtomicExpansionKind::None;
	}

	TargetLowering::AtomicExpansionKind
	X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
	unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
	Type *MemType = AI->getType();

	// If the operand is too big, we must see if cmpxchg8/16b is available
	// and default to library calls otherwise.
	if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
	return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
	: AtomicExpansionKind::None;
	}

	AtomicRMWInst::BinOp Op = AI->getOperation();
	switch (Op) {
	default:
	llvm_unreachable("Unknown atomic operation");
	case AtomicRMWInst::Xchg:
	case AtomicRMWInst::Add:
	case AtomicRMWInst::Sub:
	// It's better to use xadd, xsub or xchg for these in all cases.
	return AtomicExpansionKind::None;
	case AtomicRMWInst::Or:
	case AtomicRMWInst::And:
	case AtomicRMWInst::Xor:
	// If the atomicrmw's result isn't actually used, we can just add a "lock"
	// prefix to a normal instruction for these operations.
	return !AI->use_empty() ? AtomicExpansionKind::CmpXChg
	: AtomicExpansionKind::None;
	case AtomicRMWInst::Nand:
	case AtomicRMWInst::Max:
	case AtomicRMWInst::Min:
	case AtomicRMWInst::UMax:
	case AtomicRMWInst::UMin:
	// These always require a non-trivial set of data operations on x86. We must
	// use a cmpxchg loop.
	return AtomicExpansionKind::CmpXChg;
	}
	}

	LoadInst *
	X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
	unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
	Type *MemType = AI->getType();
	// Accesses larger than the native width are turned into cmpxchg/libcalls, so
	// there is no benefit in turning such RMWs into loads, and it is actually
	// harmful as it introduces a mfence.
	if (MemType->getPrimitiveSizeInBits() > NativeWidth)
	return nullptr;

	auto Builder = IRBuilder<>(AI);
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	auto SSID = AI->getSyncScopeID();
	// We must restrict the ordering to avoid generating loads with Release or
	// ReleaseAcquire orderings.
	auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
	auto Ptr = AI->getPointerOperand();

	// Before the load we need a fence. Here is an example lifted from
	// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence
	// is required:
	// Thread 0:
	// x.store(1, relaxed);
	// r1 = y.fetch_add(0, release);
	// Thread 1:
	// y.fetch_add(42, acquire);
	// r2 = x.load(relaxed);
	// r1 = r2 = 0 is impossible, but becomes possible if the idempotent rmw is
	// lowered to just a load without a fence. A mfence flushes the store buffer,
	// making the optimization clearly correct.
	// FIXME: it is required if isReleaseOrStronger(Order) but it is not clear
	// otherwise, we might be able to be more aggressive on relaxed idempotent
	// rmw. In practice, they do not look useful, so we don't try to be
	// especially clever.
	if (SSID == SyncScope::SingleThread)
	// FIXME: we could just insert an X86ISD::MEMBARRIER here, except we are at
	// the IR level, so we must wrap it in an intrinsic.
	return nullptr;

	if (!Subtarget.hasMFence())
	// FIXME: it might make sense to use a locked operation here but on a
	// different cache-line to prevent cache-line bouncing. In practice it
	// is probably a small win, and x86 processors without mfence are rare
	// enough that we do not bother.
	return nullptr;

	Function *MFence =
	llvm::Intrinsic::getDeclaration(M, Intrinsic::x86_sse2_mfence);
	Builder.CreateCall(MFence, {});

	// Finally we can emit the atomic load.
	LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr,
	AI->getType()->getPrimitiveSizeInBits());
	Loaded->setAtomic(Order, SSID);
	AI->replaceAllUsesWith(Loaded);
	AI->eraseFromParent();
	return Loaded;
	}

	static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc dl(Op);
	AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
	cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
	SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
	cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());

	// The only fence that needs an instruction is a sequentially-consistent
	// cross-thread fence.
	if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
	FenceSSID == SyncScope::System) {
	if (Subtarget.hasMFence())
	return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));

	SDValue Chain = Op.getOperand(0);
	SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
	SDValue Ops[] = {
	DAG.getRegister(X86::ESP, MVT::i32), // Base
	DAG.getTargetConstant(1, dl, MVT::i8), // Scale
	DAG.getRegister(0, MVT::i32), // Index
	DAG.getTargetConstant(0, dl, MVT::i32), // Disp
	DAG.getRegister(0, MVT::i32), // Segment.
	Zero,
	Chain
	};
	SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
	return SDValue(Res, 0);
	}

	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
	return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
	}

	static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT T = Op.getSimpleValueType();
	SDLoc DL(Op);
	unsigned Reg = 0;
	unsigned size = 0;
	switch(T.SimpleTy) {
	default: llvm_unreachable("Invalid value type!");
	case MVT::i8: Reg = X86::AL; size = 1; break;
	case MVT::i16: Reg = X86::AX; size = 2; break;
	case MVT::i32: Reg = X86::EAX; size = 4; break;
	case MVT::i64:
	assert(Subtarget.is64Bit() && "Node not type legal!");
	Reg = X86::RAX; size = 8;
	break;
	}
	SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
	Op.getOperand(2), SDValue());
	SDValue Ops[] = { cpIn.getValue(0),
	Op.getOperand(1),
	Op.getOperand(3),
	DAG.getTargetConstant(size, DL, MVT::i8),
	cpIn.getValue(1) };
	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
	MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
	SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
	Ops, T, MMO);

	SDValue cpOut =
	DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
	SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
	MVT::i32, cpOut.getValue(2));
	SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG);

	DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut);
	DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
	DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1));
	return SDValue();
	}

	static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT SrcVT = Op.getOperand(0).getSimpleValueType();
	MVT DstVT = Op.getSimpleValueType();

	if (SrcVT == MVT::v2i32 \|\| SrcVT == MVT::v4i16 \|\| SrcVT == MVT::v8i8 \|\|
	SrcVT == MVT::i64) {
	assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
	if (DstVT != MVT::f64)
	// This conversion needs to be expanded.
	return SDValue();

	SDValue Op0 = Op->getOperand(0);
	SmallVector<SDValue, 16> Elts;
	SDLoc dl(Op);
	unsigned NumElts;
	MVT SVT;
	if (SrcVT.isVector()) {
	NumElts = SrcVT.getVectorNumElements();
	SVT = SrcVT.getVectorElementType();

	// Widen the vector in input in the case of MVT::v2i32.
	// Example: from MVT::v2i32 to MVT::v4i32.
	for (unsigned i = 0, e = NumElts; i != e; ++i)
	Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0,
	DAG.getIntPtrConstant(i, dl)));
	} else {
	assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() &&
	"Unexpected source type in LowerBITCAST");
	Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
	DAG.getIntPtrConstant(0, dl)));
	Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
	DAG.getIntPtrConstant(1, dl)));
	NumElts = 2;
	SVT = MVT::i32;
	}
	// Explicitly mark the extra elements as Undef.
	Elts.append(NumElts, DAG.getUNDEF(SVT));

	EVT NewVT = EVT::getVectorVT(DAG.getContext(), SVT, NumElts 2);
	SDValue BV = DAG.getBuildVector(NewVT, dl, Elts);
	SDValue ToV2F64 = DAG.getBitcast(MVT::v2f64, BV);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
	DAG.getIntPtrConstant(0, dl));
	}

	assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() &&
	Subtarget.hasMMX() && "Unexpected custom BITCAST");
	assert((DstVT == MVT::i64 \|\|
	(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
	"Unexpected custom BITCAST");
	// i64 <=> MMX conversions are Legal.
	if (SrcVT==MVT::i64 && DstVT.isVector())
	return Op;
	if (DstVT==MVT::i64 && SrcVT.isVector())
	return Op;
	// MMX <=> MMX conversions are Legal.
	if (SrcVT.isVector() && DstVT.isVector())
	return Op;
	// All other conversions need to be expanded.
	return SDValue();
	}

	/// Compute the horizontal sum of bytes in V for the elements of VT.
	///
	/// Requires V to be a byte vector and VT to be an integer vector type with
	/// wider elements than V's type. The width of the elements of VT determines
	/// how many bytes of V are summed horizontally to produce each element of the
	/// result.
	static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDLoc DL(V);
	MVT ByteVecVT = V.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	assert(ByteVecVT.getVectorElementType() == MVT::i8 &&
	"Expected value to have byte element type.");
	assert(EltVT != MVT::i8 &&
	"Horizontal byte sum only makes sense for wider elements!");
	unsigned VecSize = VT.getSizeInBits();
	assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!");

	// PSADBW instruction horizontally add all bytes and leave the result in i64
	// chunks, thus directly computes the pop count for v2i64 and v4i64.
	if (EltVT == MVT::i64) {
	SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
	MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
	V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros);
	return DAG.getBitcast(VT, V);
	}

	if (EltVT == MVT::i32) {
	// We unpack the low half and high half into i32s interleaved with zeros so
	// that we can use PSADBW to horizontally sum them. The most useful part of
	// this is that it lines up the results of two PSADBW instructions to be
	// two v2i64 vectors which concatenated are the 4 population counts. We can
	// then use PACKUSWB to shrink and concatenate them into a v4i32 again.
	SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL);
	SDValue V32 = DAG.getBitcast(VT, V);
	SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V32, Zeros);
	SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V32, Zeros);

	// Do the horizontal sums into two v2i64s.
	Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
	MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
	Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
	DAG.getBitcast(ByteVecVT, Low), Zeros);
	High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
	DAG.getBitcast(ByteVecVT, High), Zeros);

	// Merge them together.
	MVT ShortVecVT = MVT::getVectorVT(MVT::i16, VecSize / 16);
	V = DAG.getNode(X86ISD::PACKUS, DL, ByteVecVT,
	DAG.getBitcast(ShortVecVT, Low),
	DAG.getBitcast(ShortVecVT, High));

	return DAG.getBitcast(VT, V);
	}

	// The only element type left is i16.
	assert(EltVT == MVT::i16 && "Unknown how to handle type");

	// To obtain pop count for each i16 element starting from the pop count for
	// i8 elements, shift the i16s left by 8, sum as i8s, and then shift as i16s
	// right by 8. It is important to shift as i16s as i8 vector shift isn't
	// directly supported.
	SDValue ShifterV = DAG.getConstant(8, DL, VT);
	SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), ShifterV);
	V = DAG.getNode(ISD::ADD, DL, ByteVecVT, DAG.getBitcast(ByteVecVT, Shl),
	DAG.getBitcast(ByteVecVT, V));
	return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), ShifterV);
	}

	static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	unsigned VecSize = VT.getSizeInBits();

	// Implement a lookup table in register by using an algorithm based on:
	// http://wm.ite.pl/articles/sse-popcount.html
	//
	// The general idea is that every lower byte nibble in the input vector is an
	// index into a in-register pre-computed pop count table. We then split up the
	// input vector in two new ones: (1) a vector with only the shifted-right
	// higher nibbles for each byte and (2) a vector with the lower nibbles (and
	// masked out higher ones) for each byte. PSHUFB is used separately with both
	// to index the in-register table. Next, both are added and the result is a
	// i8 vector where each element contains the pop count for input byte.
	//
	// To obtain the pop count for elements != i8, we follow up with the same
	// approach and use additional tricks as described below.
	//
	const int LUT[16] = {/* 0 / 0, / 1 / 1, / 2 / 1, / 3 */ 2,
	/* 4 / 1, / 5 / 2, / 6 / 2, / 7 */ 3,
	/* 8 / 1, / 9 / 2, / a / 2, / b */ 3,
	/* c / 2, / d / 3, / e / 3, / f */ 4};

	int NumByteElts = VecSize / 8;
	MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts);
	SDValue In = DAG.getBitcast(ByteVecVT, Op);
	SmallVector<SDValue, 64> LUTVec;
	for (int i = 0; i < NumByteElts; ++i)
	LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
	SDValue InRegLUT = DAG.getBuildVector(ByteVecVT, DL, LUTVec);
	SDValue M0F = DAG.getConstant(0x0F, DL, ByteVecVT);

	// High nibbles
	SDValue FourV = DAG.getConstant(4, DL, ByteVecVT);
	SDValue HighNibbles = DAG.getNode(ISD::SRL, DL, ByteVecVT, In, FourV);

	// Low nibbles
	SDValue LowNibbles = DAG.getNode(ISD::AND, DL, ByteVecVT, In, M0F);

	// The input vector is used as the shuffle mask that index elements into the
	// LUT. After counting low and high nibbles, add the vector to obtain the
	// final pop count per i8 element.
	SDValue HighPopCnt =
	DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, HighNibbles);
	SDValue LowPopCnt =
	DAG.getNode(X86ISD::PSHUFB, DL, ByteVecVT, InRegLUT, LowNibbles);
	SDValue PopCnt = DAG.getNode(ISD::ADD, DL, ByteVecVT, HighPopCnt, LowPopCnt);

	if (EltVT == MVT::i8)
	return PopCnt;

	return LowerHorizontalByteSum(PopCnt, VT, Subtarget, DAG);
	}

	static SDValue LowerVectorCTPOPBitmath(SDValue Op, const SDLoc &DL,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	assert(VT.is128BitVector() &&
	"Only 128-bit vector bitmath lowering supported.");

	int VecSize = VT.getSizeInBits();
	MVT EltVT = VT.getVectorElementType();
	int Len = EltVT.getSizeInBits();

	// This is the vectorized version of the "best" algorithm from
	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
	// with a minor tweak to use a series of adds + shifts instead of vector
	// multiplications. Implemented for all integer vector types. We only use
	// this when we don't have SSSE3 which allows a LUT-based lowering that is
	// much faster, even faster than using native popcnt instructions.

	auto GetShift = [&](unsigned OpCode, SDValue V, int Shifter) {
	MVT VT = V.getSimpleValueType();
	SDValue ShifterV = DAG.getConstant(Shifter, DL, VT);
	return DAG.getNode(OpCode, DL, VT, V, ShifterV);
	};
	auto GetMask = [&](SDValue V, APInt Mask) {
	MVT VT = V.getSimpleValueType();
	SDValue MaskV = DAG.getConstant(Mask, DL, VT);
	return DAG.getNode(ISD::AND, DL, VT, V, MaskV);
	};

	// We don't want to incur the implicit masks required to SRL vNi8 vectors on
	// x86, so set the SRL type to have elements at least i16 wide. This is
	// correct because all of our SRLs are followed immediately by a mask anyways
	// that handles any bits that sneak into the high bits of the byte elements.
	MVT SrlVT = Len > 8 ? VT : MVT::getVectorVT(MVT::i16, VecSize / 16);

	SDValue V = Op;

	// v = v - ((v >> 1) & 0x55555555...)
	SDValue Srl =
	DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 1));
	SDValue And = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x55)));
	V = DAG.getNode(ISD::SUB, DL, VT, V, And);

	// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
	SDValue AndLHS = GetMask(V, APInt::getSplat(Len, APInt(8, 0x33)));
	Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 2));
	SDValue AndRHS = GetMask(Srl, APInt::getSplat(Len, APInt(8, 0x33)));
	V = DAG.getNode(ISD::ADD, DL, VT, AndLHS, AndRHS);

	// v = (v + (v >> 4)) & 0x0F0F0F0F...
	Srl = DAG.getBitcast(VT, GetShift(ISD::SRL, DAG.getBitcast(SrlVT, V), 4));
	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, V, Srl);
	V = GetMask(Add, APInt::getSplat(Len, APInt(8, 0x0F)));

	// At this point, V contains the byte-wise population count, and we are
	// merely doing a horizontal sum if necessary to get the wider element
	// counts.
	if (EltVT == MVT::i8)
	return V;

	return LowerHorizontalByteSum(
	DAG.getBitcast(MVT::getVectorVT(MVT::i8, VecSize / 8), V), VT, Subtarget,
	DAG);
	}

	// Please ensure that any codegen change from LowerVectorCTPOP is reflected in
	// updated cost models in X86TTIImpl::getIntrinsicInstrCost.
	static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	assert((VT.is512BitVector() \|\| VT.is256BitVector() \|\| VT.is128BitVector()) &&
	"Unknown CTPOP type to handle");
	SDLoc DL(Op.getNode());
	SDValue Op0 = Op.getOperand(0);

	// TRUNC(CTPOP(ZEXT(X))) to make use of vXi32/vXi64 VPOPCNT instructions.
	if (Subtarget.hasVPOPCNTDQ()) {
	if (VT == MVT::v8i16) {
	Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v8i64, Op0);
	Op = DAG.getNode(ISD::CTPOP, DL, MVT::v8i64, Op);
	return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op);
	}
	if (VT == MVT::v16i8 \|\| VT == MVT::v16i16) {
	Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v16i32, Op0);
	Op = DAG.getNode(ISD::CTPOP, DL, MVT::v16i32, Op);
	return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op);
	}
	}

	if (!Subtarget.hasSSSE3()) {
	// We can't use the fast LUT approach, so fall back on vectorized bitmath.
	assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!");
	return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG);
	}

	// Decompose 256-bit ops into smaller 128-bit ops.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return Lower256IntUnary(Op, DAG);

	// Decompose 512-bit ops into smaller 256-bit ops.
	if (VT.is512BitVector() && !Subtarget.hasBWI())
	return Lower512IntUnary(Op, DAG);

	return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG);
	}

	static SDValue LowerCTPOP(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Op.getSimpleValueType().isVector() &&
	"We only do custom lowering for vector population count.");
	return LowerVectorCTPOP(Op, Subtarget, DAG);
	}

	static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {
	MVT VT = Op.getSimpleValueType();
	SDValue In = Op.getOperand(0);
	SDLoc DL(Op);

	// For scalars, its still beneficial to transfer to/from the SIMD unit to
	// perform the BITREVERSE.
	if (!VT.isVector()) {
	MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());
	SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In);
	Res = DAG.getNode(ISD::BITREVERSE, DL, VecVT, Res);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Res,
	DAG.getIntPtrConstant(0, DL));
	}

	int NumElts = VT.getVectorNumElements();
	int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;

	// Decompose 256-bit ops into smaller 128-bit ops.
	if (VT.is256BitVector())
	return Lower256IntUnary(Op, DAG);

	assert(VT.is128BitVector() &&
	"Only 128-bit vector bitreverse lowering supported.");

	// VPPERM reverses the bits of a byte with the permute Op (2 << 5), and we
	// perform the BSWAP in the shuffle.
	// Its best to shuffle using the second operand as this will implicitly allow
	// memory folding for multiple vectors.
	SmallVector<SDValue, 16> MaskElts;
	for (int i = 0; i != NumElts; ++i) {
	for (int j = ScalarSizeInBytes - 1; j >= 0; --j) {
	int SourceByte = 16 + (i * ScalarSizeInBytes) + j;
	int PermuteByte = SourceByte \| (2 << 5);
	MaskElts.push_back(DAG.getConstant(PermuteByte, DL, MVT::i8));
	}
	}

	SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, MaskElts);
	SDValue Res = DAG.getBitcast(MVT::v16i8, In);
	Res = DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, DAG.getUNDEF(MVT::v16i8),
	Res, Mask);
	return DAG.getBitcast(VT, Res);
	}

	static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	if (Subtarget.hasXOP())
	return LowerBITREVERSE_XOP(Op, DAG);

	assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE");

	MVT VT = Op.getSimpleValueType();
	SDValue In = Op.getOperand(0);
	SDLoc DL(Op);

	unsigned NumElts = VT.getVectorNumElements();
	assert(VT.getScalarType() == MVT::i8 &&
	"Only byte vector BITREVERSE supported");

	// Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2.
	if (VT.is256BitVector() && !Subtarget.hasInt256())
	return Lower256IntUnary(Op, DAG);

	// Perform BITREVERSE using PSHUFB lookups. Each byte is split into
	// two nibbles and a PSHUFB lookup to find the bitreverse of each
	// 0-15 value (moved to the other nibble).
	SDValue NibbleMask = DAG.getConstant(0xF, DL, VT);
	SDValue Lo = DAG.getNode(ISD::AND, DL, VT, In, NibbleMask);
	SDValue Hi = DAG.getNode(ISD::SRL, DL, VT, In, DAG.getConstant(4, DL, VT));

	const int LoLUT[16] = {
	/* 0 / 0x00, / 1 / 0x80, / 2 / 0x40, / 3 */ 0xC0,
	/* 4 / 0x20, / 5 / 0xA0, / 6 / 0x60, / 7 */ 0xE0,
	/* 8 / 0x10, / 9 / 0x90, / a / 0x50, / b */ 0xD0,
	/* c / 0x30, / d / 0xB0, / e / 0x70, / f */ 0xF0};
	const int HiLUT[16] = {
	/* 0 / 0x00, / 1 / 0x08, / 2 / 0x04, / 3 */ 0x0C,
	/* 4 / 0x02, / 5 / 0x0A, / 6 / 0x06, / 7 */ 0x0E,
	/* 8 / 0x01, / 9 / 0x09, / a / 0x05, / b */ 0x0D,
	/* c / 0x03, / d / 0x0B, / e / 0x07, / f */ 0x0F};

	SmallVector<SDValue, 16> LoMaskElts, HiMaskElts;
	for (unsigned i = 0; i < NumElts; ++i) {
	LoMaskElts.push_back(DAG.getConstant(LoLUT[i % 16], DL, MVT::i8));
	HiMaskElts.push_back(DAG.getConstant(HiLUT[i % 16], DL, MVT::i8));
	}

	SDValue LoMask = DAG.getBuildVector(VT, DL, LoMaskElts);
	SDValue HiMask = DAG.getBuildVector(VT, DL, HiMaskElts);
	Lo = DAG.getNode(X86ISD::PSHUFB, DL, VT, LoMask, Lo);
	Hi = DAG.getNode(X86ISD::PSHUFB, DL, VT, HiMask, Hi);
	return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
	}

	static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG) {
	unsigned NewOpc = 0;
	switch (N->getOpcode()) {
	case ISD::ATOMIC_LOAD_ADD:
	NewOpc = X86ISD::LADD;
	break;
	case ISD::ATOMIC_LOAD_SUB:
	NewOpc = X86ISD::LSUB;
	break;
	case ISD::ATOMIC_LOAD_OR:
	NewOpc = X86ISD::LOR;
	break;
	case ISD::ATOMIC_LOAD_XOR:
	NewOpc = X86ISD::LXOR;
	break;
	case ISD::ATOMIC_LOAD_AND:
	NewOpc = X86ISD::LAND;
	break;
	default:
	llvm_unreachable("Unknown ATOMIC_LOAD_ opcode");
	}

	MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
	return DAG.getMemIntrinsicNode(
	NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
	{N->getOperand(0), N->getOperand(1), N->getOperand(2)},
	/MemVT=/N->getSimpleValueType(0), MMO);
	}

	/// Lower atomic_load_ops into LOCK-prefixed operations.
	static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue Chain = N->getOperand(0);
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);
	unsigned Opc = N->getOpcode();
	MVT VT = N->getSimpleValueType(0);
	SDLoc DL(N);

	// We can lower atomic_load_add into LXADD. However, any other atomicrmw op
	// can only be lowered when the result is unused. They should have already
	// been transformed into a cmpxchg loop in AtomicExpand.
	if (N->hasAnyUseOfValue(0)) {
	// Handle (atomic_load_sub p, v) as (atomic_load_add p, -v), to be able to
	// select LXADD if LOCK_SUB can't be selected.
	if (Opc == ISD::ATOMIC_LOAD_SUB) {
	AtomicSDNode *AN = cast<AtomicSDNode>(N.getNode());
	RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
	return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,
	RHS, AN->getMemOperand());
	}
	assert(Opc == ISD::ATOMIC_LOAD_ADD &&
	"Used AtomicRMW ops other than Add should have been expanded!");
	return N;
	}

	SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG);
	// RAUW the chain, but don't worry about the result, as it's unused.
	assert(!N->hasAnyUseOfValue(0));
	DAG.ReplaceAllUsesOfValueWith(N.getValue(1), LockOp.getValue(1));
	return SDValue();
	}

	static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
	SDNode *Node = Op.getNode();
	SDLoc dl(Node);
	EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();

	// Convert seq_cst store -> xchg
	// Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
	// FIXME: On 32-bit, store -> fist or movq would be more efficient
	// (The only way to get a 16-byte store is cmpxchg16b)
	// FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
	if (cast<AtomicSDNode>(Node)->getOrdering() ==
	AtomicOrdering::SequentiallyConsistent \|\|
	!DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
	SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
	cast<AtomicSDNode>(Node)->getMemoryVT(),
	Node->getOperand(0),
	Node->getOperand(1), Node->getOperand(2),
	cast<AtomicSDNode>(Node)->getMemOperand());
	return Swap.getValue(1);
	}
	// Other atomic stores have a simple pattern.
	return Op;
	}

	static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
	SDNode *N = Op.getNode();
	MVT VT = N->getSimpleValueType(0);

	// Let legalize expand this if it isn't a legal type yet.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	SDVTList VTs = DAG.getVTList(VT, MVT::i32);
	SDLoc DL(N);

	// Set the carry flag.
	SDValue Carry = Op.getOperand(2);
	EVT CarryVT = Carry.getValueType();
	APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
	Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
	Carry, DAG.getConstant(NegOne, DL, CarryVT));

	unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB;
	SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0),
	Op.getOperand(1), Carry.getValue(1));

	SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG);
	if (N->getValueType(1) == MVT::i1)
	SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);

	return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
	}

	static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit());

	// For MacOSX, we want to call an alternative entry point: __sincos_stret,
	// which returns the values as { float, float } (in XMM0) or
	// { double, double } (which is returned in XMM0, XMM1).
	SDLoc dl(Op);
	SDValue Arg = Op.getOperand(0);
	EVT ArgVT = Arg.getValueType();
	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());

	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;

	Entry.Node = Arg;
	Entry.Ty = ArgTy;
	Entry.IsSExt = false;
	Entry.IsZExt = false;
	Args.push_back(Entry);

	bool isF64 = ArgVT == MVT::f64;
	// Only optimize x86_64 for now. i386 is a bit messy. For f32,
	// the small struct {f32, f32} is returned in (eax, edx). For f64,
	// the results are returned via SRet in memory.
	const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret";
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Callee =
	DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));

	Type RetTy = isF64 ? (Type )StructType::get(ArgTy, ArgTy)
	: (Type *)VectorType::get(ArgTy, 4);

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl)
	.setChain(DAG.getEntryNode())
	.setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args));

	std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);

	if (isF64)
	// Returned in xmm0 and xmm1.
	return CallResult.first;

	// Returned in bits 0:31 and 32:64 xmm0.
	SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
	CallResult.first, DAG.getIntPtrConstant(0, dl));
	SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
	CallResult.first, DAG.getIntPtrConstant(1, dl));
	SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
	return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
	}

	/// Widen a vector input to a vector of NVT. The
	/// input vector must have the same element type as NVT.
	static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG,
	bool FillWithZeroes = false) {
	// Check if InOp already has the right width.
	MVT InVT = InOp.getSimpleValueType();
	if (InVT == NVT)
	return InOp;

	if (InOp.isUndef())
	return DAG.getUNDEF(NVT);

	assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
	"input and widen element type must match");

	unsigned InNumElts = InVT.getVectorNumElements();
	unsigned WidenNumElts = NVT.getVectorNumElements();
	assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 &&
	"Unexpected request for vector widening");

	SDLoc dl(InOp);
	if (InOp.getOpcode() == ISD::CONCAT_VECTORS &&
	InOp.getNumOperands() == 2) {
	SDValue N1 = InOp.getOperand(1);
	if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) \|\|
	N1.isUndef()) {
	InOp = InOp.getOperand(0);
	InVT = InOp.getSimpleValueType();
	InNumElts = InVT.getVectorNumElements();
	}
	}
	if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) \|\|
	ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) {
	SmallVector<SDValue, 16> Ops;
	for (unsigned i = 0; i < InNumElts; ++i)
	Ops.push_back(InOp.getOperand(i));

	EVT EltVT = InOp.getOperand(0).getValueType();

	SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
	DAG.getUNDEF(EltVT);
	for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i)
	Ops.push_back(FillVal);
	return DAG.getBuildVector(NVT, dl, Ops);
	}
	SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) :
	DAG.getUNDEF(NVT);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal,
	InOp, DAG.getIntPtrConstant(0, dl));
	}

	static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.hasAVX512() &&
	"MGATHER/MSCATTER are supported on AVX-512 arch only");

	// X86 scatter kills mask register, so its type should be added to
	// the list of return values.
	// If the "scatter" has 2 return values, it is already handled.
	if (Op.getNode()->getNumValues() == 2)
	return Op;

	MaskedScatterSDNode *N = cast<MaskedScatterSDNode>(Op.getNode());
	SDValue Src = N->getValue();
	MVT VT = Src.getSimpleValueType();
	assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op");
	SDLoc dl(Op);

	SDValue NewScatter;
	SDValue Index = N->getIndex();
	SDValue Mask = N->getMask();
	SDValue Chain = N->getChain();
	SDValue BasePtr = N->getBasePtr();
	MVT MemVT = N->getMemoryVT().getSimpleVT();
	MVT IndexVT = Index.getSimpleValueType();
	MVT MaskVT = Mask.getSimpleValueType();

	if (MemVT.getScalarSizeInBits() < VT.getScalarSizeInBits()) {
	// The v2i32 value was promoted to v2i64.
	// Now we "redo" the type legalizer's work and widen the original
	// v2i32 value to v4i32. The original v2i32 is retrieved from v2i64
	// with a shuffle.
	assert((MemVT == MVT::v2i32 && VT == MVT::v2i64) &&
	"Unexpected memory type");
	int ShuffleMask[] = {0, 2, -1, -1};
	Src = DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Src),
	DAG.getUNDEF(MVT::v4i32), ShuffleMask);
	// Now we have 4 elements instead of 2.
	// Expand the index.
	MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), 4);
	Index = ExtendToType(Index, NewIndexVT, DAG);

	// Expand the mask with zeroes
	// Mask may be <2 x i64> or <2 x i1> at this moment
	assert((MaskVT == MVT::v2i1 \|\| MaskVT == MVT::v2i64) &&
	"Unexpected mask type");
	MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), 4);
	Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
	VT = MVT::v4i32;
	}

	unsigned NumElts = VT.getVectorNumElements();
	if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
	!Index.getSimpleValueType().is512BitVector()) {
	// AVX512F supports only 512-bit vectors. Or data or index should
	// be 512 bit wide. If now the both index and data are 256-bit, but
	// the vector contains 8 elements, we just sign-extend the index
	if (IndexVT == MVT::v8i32)
	// Just extend index
	Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
	else {
	// The minimal number of elts in scatter is 8
	NumElts = 8;
	// Index
	MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts);
	// Use original index here, do not modify the index twice
	Index = ExtendToType(N->getIndex(), NewIndexVT, DAG);
	if (IndexVT.getScalarType() == MVT::i32)
	Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);

	// Mask
	// At this point we have promoted mask operand
	assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
	MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
	// Use the original mask here, do not modify the mask twice
	Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true);

	// The value that should be stored
	MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
	Src = ExtendToType(Src, NewVT, DAG);
	}
	}
	// If the mask is "wide" at this point - truncate it to i1 vector
	MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts);
	Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask);

	// The mask is killed by scatter, add it to the values
	SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other);
	SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index};
	NewScatter = DAG.getMaskedScatter(VTs, N->getMemoryVT(), dl, Ops,
	N->getMemOperand());
	DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
	return SDValue(NewScatter.getNode(), 1);
	}

	static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {

	MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
	MVT VT = Op.getSimpleValueType();
	MVT ScalarVT = VT.getScalarType();
	SDValue Mask = N->getMask();
	SDLoc dl(Op);

	assert((!N->isExpandingLoad() \|\| Subtarget.hasAVX512()) &&
	"Expanding masked load is supported on AVX-512 target only!");

	assert((!N->isExpandingLoad() \|\| ScalarVT.getSizeInBits() >= 32) &&
	"Expanding masked load is supported for 32 and 64-bit types only!");

	// 4x32, 4x64 and 2x64 vectors of non-expanding loads are legal regardless of
	// VLX. These types for exp-loads are handled here.
	if (!N->isExpandingLoad() && VT.getVectorNumElements() <= 4)
	return Op;

	assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
	"Cannot lower masked load op.");

	assert((ScalarVT.getSizeInBits() >= 32 \|\|
	(Subtarget.hasBWI() &&
	(ScalarVT == MVT::i8 \|\| ScalarVT == MVT::i16))) &&
	"Unsupported masked load op.");

	// This operation is legal for targets with VLX, but without
	// VLX the vector should be widened to 512 bit
	unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
	MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
	SDValue Src0 = N->getSrc0();
	Src0 = ExtendToType(Src0, WideDataVT, DAG);

	// Mask element has to be i1.
	MVT MaskEltTy = Mask.getSimpleValueType().getScalarType();
	assert((MaskEltTy == MVT::i1 \|\| VT.getVectorNumElements() <= 4) &&
	"We handle 4x32, 4x64 and 2x64 vectors only in this case");

	MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec);

	Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
	if (MaskEltTy != MVT::i1)
	Mask = DAG.getNode(ISD::TRUNCATE, dl,
	MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask);
	SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
	N->getBasePtr(), Mask, Src0,
	N->getMemoryVT(), N->getMemOperand(),
	N->getExtensionType(),
	N->isExpandingLoad());

	SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
	NewLoad.getValue(0),
	DAG.getIntPtrConstant(0, dl));
	SDValue RetOps[] = {Exract, NewLoad.getValue(1)};
	return DAG.getMergeValues(RetOps, dl);
	}

	static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode());
	SDValue DataToStore = N->getValue();
	MVT VT = DataToStore.getSimpleValueType();
	MVT ScalarVT = VT.getScalarType();
	SDValue Mask = N->getMask();
	SDLoc dl(Op);

	assert((!N->isCompressingStore() \|\| Subtarget.hasAVX512()) &&
	"Expanding masked load is supported on AVX-512 target only!");

	assert((!N->isCompressingStore() \|\| ScalarVT.getSizeInBits() >= 32) &&
	"Expanding masked load is supported for 32 and 64-bit types only!");

	// 4x32 and 2x64 vectors of non-compressing stores are legal regardless to VLX.
	if (!N->isCompressingStore() && VT.getVectorNumElements() <= 4)
	return Op;

	assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
	"Cannot lower masked store op.");

	assert((ScalarVT.getSizeInBits() >= 32 \|\|
	(Subtarget.hasBWI() &&
	(ScalarVT == MVT::i8 \|\| ScalarVT == MVT::i16))) &&
	"Unsupported masked store op.");

	// This operation is legal for targets with VLX, but without
	// VLX the vector should be widened to 512 bit
	unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
	MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);

	// Mask element has to be i1.
	MVT MaskEltTy = Mask.getSimpleValueType().getScalarType();
	assert((MaskEltTy == MVT::i1 \|\| VT.getVectorNumElements() <= 4) &&
	"We handle 4x32, 4x64 and 2x64 vectors only in this case");

	MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec);

	DataToStore = ExtendToType(DataToStore, WideDataVT, DAG);
	Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
	if (MaskEltTy != MVT::i1)
	Mask = DAG.getNode(ISD::TRUNCATE, dl,
	MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask);
	return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),
	Mask, N->getMemoryVT(), N->getMemOperand(),
	N->isTruncatingStore(), N->isCompressingStore());
	}

	static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	assert(Subtarget.hasAVX512() &&
	"MGATHER/MSCATTER are supported on AVX-512 arch only");

	MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode());
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue Index = N->getIndex();
	SDValue Mask = N->getMask();
	SDValue Src0 = N->getValue();
	MVT IndexVT = Index.getSimpleValueType();
	MVT MaskVT = Mask.getSimpleValueType();

	unsigned NumElts = VT.getVectorNumElements();
	assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");

	if (!Subtarget.hasVLX() && !VT.is512BitVector() &&
	!Index.getSimpleValueType().is512BitVector()) {
	// AVX512F supports only 512-bit vectors. Or data or index should
	// be 512 bit wide. If now the both index and data are 256-bit, but
	// the vector contains 8 elements, we just sign-extend the index
	if (NumElts == 8) {
	Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
	SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), Index };
	DAG.UpdateNodeOperands(N, Ops);
	return Op;
	}

	// Minimal number of elements in Gather
	NumElts = 8;
	// Index
	MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts);
	Index = ExtendToType(Index, NewIndexVT, DAG);
	if (IndexVT.getScalarType() == MVT::i32)
	Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);

	// Mask
	MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts);
	// At this point we have promoted mask operand
	assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
	MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
	Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
	Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask);

	// The pass-through value
	MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
	Src0 = ExtendToType(Src0, NewVT, DAG);

	SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
	SDValue NewGather = DAG.getMaskedGather(DAG.getVTList(NewVT, MVT::Other),
	N->getMemoryVT(), dl, Ops,
	N->getMemOperand());
	SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
	NewGather.getValue(0),
	DAG.getIntPtrConstant(0, dl));
	SDValue RetOps[] = {Exract, NewGather.getValue(1)};
	return DAG.getMergeValues(RetOps, dl);
	}
	if (N->getMemoryVT() == MVT::v2i32 && Subtarget.hasVLX()) {
	// There is a special case when the return type is v2i32 is illegal and
	// the type legaizer extended it to v2i64. Without this conversion we end up
	// with VPGATHERQQ (reading q-words from the memory) instead of VPGATHERQD.
	// In order to avoid this situation, we'll build an X86 specific Gather node
	// with index v2i64 and value type v4i32.
	assert(VT == MVT::v2i64 && Src0.getValueType() == MVT::v2i64 &&
	"Unexpected type in masked gather");
	Src0 = DAG.getVectorShuffle(MVT::v4i32, dl,
	DAG.getBitcast(MVT::v4i32, Src0),
	DAG.getUNDEF(MVT::v4i32), { 0, 2, -1, -1 });
	// The mask should match the destination type. Extending mask with zeroes
	// is not necessary since instruction itself reads only two values from
	// memory.
	Mask = ExtendToType(Mask, MVT::v4i1, DAG, false);
	SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
	SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
	DAG.getVTList(MVT::v4i32, MVT::Other), Ops, dl, N->getMemoryVT(),
	N->getMemOperand());

	SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64,
	NewGather.getValue(0), DAG);
	SDValue RetOps[] = { Sext, NewGather.getValue(1) };
	return DAG.getMergeValues(RetOps, dl);
	}
	if (N->getMemoryVT() == MVT::v2f32 && Subtarget.hasVLX()) {
	// This transformation is for optimization only.
	// The type legalizer extended mask and index to 4 elements vector
	// in order to match requirements of the common gather node - same
	// vector width of index and value. X86 Gather node allows mismatch
	// of vector width in order to select more optimal instruction at the
	// end.
	assert(VT == MVT::v4f32 && Src0.getValueType() == MVT::v4f32 &&
	"Unexpected type in masked gather");
	if (Mask.getOpcode() == ISD::CONCAT_VECTORS &&
	ISD::isBuildVectorAllZeros(Mask.getOperand(1).getNode()) &&
	Index.getOpcode() == ISD::CONCAT_VECTORS &&
	Index.getOperand(1).isUndef()) {
	Mask = ExtendToType(Mask.getOperand(0), MVT::v4i1, DAG, false);
	Index = Index.getOperand(0);
	} else
	return Op;
	SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
	SDValue NewGather = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
	DAG.getVTList(MVT::v4f32, MVT::Other), Ops, dl, N->getMemoryVT(),
	N->getMemOperand());

	SDValue RetOps[] = { NewGather.getValue(0), NewGather.getValue(1) };
	return DAG.getMergeValues(RetOps, dl);

	}
	return Op;
	}

	SDValue X86TargetLowering::LowerGC_TRANSITION_START(SDValue Op,
	SelectionDAG &DAG) const {
	// TODO: Eventually, the lowering of these nodes should be informed by or
	// deferred to the GC strategy for the function in which they appear. For
	// now, however, they must be lowered to something. Since they are logically
	// no-ops in the case of a null GC strategy (or a GC strategy which does not
	// require special handling for these nodes), lower them as literal NOOPs for
	// the time being.
	SmallVector<SDValue, 2> Ops;

	Ops.push_back(Op.getOperand(0));
	if (Op->getGluedNode())
	Ops.push_back(Op->getOperand(Op->getNumOperands() - 1));

	SDLoc OpDL(Op);
	SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0);

	return NOOP;
	}

	SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op,
	SelectionDAG &DAG) const {
	// TODO: Eventually, the lowering of these nodes should be informed by or
	// deferred to the GC strategy for the function in which they appear. For
	// now, however, they must be lowered to something. Since they are logically
	// no-ops in the case of a null GC strategy (or a GC strategy which does not
	// require special handling for these nodes), lower them as literal NOOPs for
	// the time being.
	SmallVector<SDValue, 2> Ops;

	Ops.push_back(Op.getOperand(0));
	if (Op->getGluedNode())
	Ops.push_back(Op->getOperand(Op->getNumOperands() - 1));

	SDLoc OpDL(Op);
	SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0);

	return NOOP;
	}

	/// Provide custom lowering hooks for some operations.
	SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
	switch (Op.getOpcode()) {
	default: llvm_unreachable("Should not custom lower this!");
	case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
	case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
	return LowerCMP_SWAP(Op, Subtarget, DAG);
	case ISD::CTPOP: return LowerCTPOP(Op, Subtarget, DAG);
	case ISD::ATOMIC_LOAD_ADD:
	case ISD::ATOMIC_LOAD_SUB:
	case ISD::ATOMIC_LOAD_OR:
	case ISD::ATOMIC_LOAD_XOR:
	case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget);
	case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG);
	case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG);
	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
	case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
	case ISD::VECTOR_SHUFFLE: return lowerVectorShuffle(Op, Subtarget, DAG);
	case ISD::VSELECT: return LowerVSELECT(Op, DAG);
	case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
	case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
	case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
	case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG);
	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
	case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
	case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
	case ISD::SHL_PARTS:
	case ISD::SRA_PARTS:
	case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
	case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
	case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
	case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
	case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
	case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG);
	case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG);
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG);
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
	case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
	case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG);
	case ISD::FABS:
	case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
	case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
	case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
	case ISD::SETCC: return LowerSETCC(Op, DAG);
	case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
	case ISD::SELECT: return LowerSELECT(Op, DAG);
	case ISD::BRCOND: return LowerBRCOND(Op, DAG);
	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
	case ISD::VASTART: return LowerVASTART(Op, DAG);
	case ISD::VAARG: return LowerVAARG(Op, DAG);
	case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG);
	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, Subtarget, DAG);
	case ISD::INTRINSIC_VOID:
	case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
	case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG);
	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
	case ISD::FRAME_TO_ARGS_OFFSET:
	return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
	case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
	case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
	case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
	case ISD::EH_SJLJ_SETUP_DISPATCH:
	return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
	case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
	case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
	case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
	case ISD::CTLZ:
	case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG);
	case ISD::CTTZ:
	case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, DAG);
	case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
	case ISD::MULHS:
	case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG);
	case ISD::UMUL_LOHI:
	case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG);
	case ISD::ROTL:
	case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG);
	case ISD::SRA:
	case ISD::SRL:
	case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
	case ISD::SADDO:
	case ISD::UADDO:
	case ISD::SSUBO:
	case ISD::USUBO:
	case ISD::SMULO:
	case ISD::UMULO: return LowerXALUO(Op, DAG);
	case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
	case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
	case ISD::ADDCARRY:
	case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
	case ISD::ADD:
	case ISD::SUB: return LowerADD_SUB(Op, DAG);
	case ISD::SMAX:
	case ISD::SMIN:
	case ISD::UMAX:
	case ISD::UMIN: return LowerMINMAX(Op, DAG);
	case ISD::ABS: return LowerABS(Op, DAG);
	case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
	case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
	case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
	case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG);
	case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG);
	case ISD::GC_TRANSITION_START:
	return LowerGC_TRANSITION_START(Op, DAG);
	case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION_END(Op, DAG);
	case ISD::STORE: return LowerTruncatingStore(Op, Subtarget, DAG);
	}
	}

	/// Places new result values for the node in Results (their number
	/// and types must exactly match those of the original return values of
	/// the node), or leaves Results empty, which indicates that the node is not
	/// to be custom lowered after all.
	void X86TargetLowering::LowerOperationWrapper(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) const {
	SDValue Res = LowerOperation(SDValue(N, 0), DAG);

	if (!Res.getNode())
	return;

	assert((N->getNumValues() <= Res->getNumValues()) &&
	"Lowering returned the wrong number of results!");

	// Places new result values base on N result number.
	// In some cases (LowerSINT_TO_FP for example) Res has more result values
	// than original node, chain should be dropped(last value).
	for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
	Results.push_back(Res.getValue(I));
	}

	/// Replace a node with an illegal result type with a new node built out of
	/// custom code.
	void X86TargetLowering::ReplaceNodeResults(SDNode *N,
	SmallVectorImpl<SDValue>&Results,
	SelectionDAG &DAG) const {
	SDLoc dl(N);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	switch (N->getOpcode()) {
	default:
	llvm_unreachable("Do not know how to custom type legalize this operation!");
	case X86ISD::AVG: {
	// Legalize types for X86ISD::AVG by expanding vectors.
	assert(Subtarget.hasSSE2() && "Requires at least SSE2!");

	auto InVT = N->getValueType(0);
	auto InVTSize = InVT.getSizeInBits();
	const unsigned RegSize =
	(InVTSize > 128) ? ((InVTSize > 256) ? 512 : 256) : 128;
	assert((Subtarget.hasBWI() \|\| RegSize < 512) &&
	"512-bit vector requires AVX512BW");
	assert((Subtarget.hasAVX2() \|\| RegSize < 256) &&
	"256-bit vector requires AVX2");

	auto ElemVT = InVT.getVectorElementType();
	auto RegVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
	RegSize / ElemVT.getSizeInBits());
	assert(RegSize % InVT.getSizeInBits() == 0);
	unsigned NumConcat = RegSize / InVT.getSizeInBits();

	SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
	Ops[0] = N->getOperand(0);
	SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
	Ops[0] = N->getOperand(1);
	SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);

	SDValue Res = DAG.getNode(X86ISD::AVG, dl, RegVT, InVec0, InVec1);
	Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res,
	DAG.getIntPtrConstant(0, dl)));
	return;
	}
	// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
	case X86ISD::FMINC:
	case X86ISD::FMIN:
	case X86ISD::FMAXC:
	case X86ISD::FMAX: {
	EVT VT = N->getValueType(0);
	assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX.");
	SDValue UNDEF = DAG.getUNDEF(VT);
	SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
	N->getOperand(0), UNDEF);
	SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
	N->getOperand(1), UNDEF);
	Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS));
	return;
	}
	case ISD::SDIV:
	case ISD::UDIV:
	case ISD::SREM:
	case ISD::UREM:
	case ISD::SDIVREM:
	case ISD::UDIVREM: {
	SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
	Results.push_back(V);
	return;
	}
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT: {
	bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;

	if (N->getValueType(0) == MVT::v2i32) {
	assert((IsSigned \|\| Subtarget.hasAVX512()) &&
	"Can only handle signed conversion without AVX512");
	assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
	SDValue Src = N->getOperand(0);
	if (Src.getValueType() == MVT::v2f64) {
	SDValue Idx = DAG.getIntPtrConstant(0, dl);
	SDValue Res = DAG.getNode(IsSigned ? X86ISD::CVTTP2SI
	: X86ISD::CVTTP2UI,
	dl, MVT::v4i32, Src);
	Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
	Results.push_back(Res);
	return;
	}
	if (Src.getValueType() == MVT::v2f32) {
	SDValue Idx = DAG.getIntPtrConstant(0, dl);
	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
	DAG.getUNDEF(MVT::v2f32));
	Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT
	: ISD::FP_TO_UINT, dl, MVT::v4i32, Res);
	Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
	Results.push_back(Res);
	return;
	}

	// The FP_TO_INTHelper below only handles f32/f64/f80 scalar inputs,
	// so early out here.
	return;
	}

	std::pair<SDValue,SDValue> Vals =
	FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /IsReplace=/ true);
	SDValue FIST = Vals.first, StackSlot = Vals.second;
	if (FIST.getNode()) {
	EVT VT = N->getValueType(0);
	// Return a load from the stack slot.
	if (StackSlot.getNode())
	Results.push_back(
	DAG.getLoad(VT, dl, FIST, StackSlot, MachinePointerInfo()));
	else
	Results.push_back(FIST);
	}
	return;
	}
	case ISD::SINT_TO_FP: {
	assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
	SDValue Src = N->getOperand(0);
	if (N->getValueType(0) != MVT::v2f32 \|\| Src.getValueType() != MVT::v2i64)
	return;
	Results.push_back(DAG.getNode(X86ISD::CVTSI2P, dl, MVT::v4f32, Src));
	return;
	}
	case ISD::UINT_TO_FP: {
	assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
	EVT VT = N->getValueType(0);
	if (VT != MVT::v2f32)
	return;
	SDValue Src = N->getOperand(0);
	EVT SrcVT = Src.getValueType();
	if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
	Results.push_back(DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v4f32, Src));
	return;
	}
	if (SrcVT != MVT::v2i32)
	return;
	SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src);
	SDValue VBias =
	DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::v2f64);
	SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
	DAG.getBitcast(MVT::v2i64, VBias));
	Or = DAG.getBitcast(MVT::v2f64, Or);
	// TODO: Are there any fast-math-flags to propagate here?
	SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
	Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
	return;
	}
	case ISD::FP_ROUND: {
	if (!TLI.isTypeLegal(N->getOperand(0).getValueType()))
	return;
	SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
	Results.push_back(V);
	return;
	}
	case ISD::FP_EXTEND: {
	// Right now, only MVT::v2f32 has OperationAction for FP_EXTEND.
	// No other ValueType for FP_EXTEND should reach this point.
	assert(N->getValueType(0) == MVT::v2f32 &&
	"Do not know how to legalize this Node");
	return;
	}
	case ISD::INTRINSIC_W_CHAIN: {
	unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
	switch (IntNo) {
	default : llvm_unreachable("Do not know how to custom type "
	"legalize this intrinsic operation!");
	case Intrinsic::x86_rdtsc:
	return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
	Results);
	case Intrinsic::x86_rdtscp:
	return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget,
	Results);
	case Intrinsic::x86_rdpmc:
	return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);

	case Intrinsic::x86_xgetbv:
	return getExtendedControlRegister(N, dl, DAG, Subtarget, Results);
	}
	}
	case ISD::INTRINSIC_WO_CHAIN: {
	if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG))
	Results.push_back(V);
	return;
	}
	case ISD::READCYCLECOUNTER: {
	return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
	Results);
	}
	case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
	EVT T = N->getValueType(0);
	assert((T == MVT::i64 \|\| T == MVT::i128) && "can only expand cmpxchg pair");
	bool Regs64bit = T == MVT::i128;
	MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
	SDValue cpInL, cpInH;
	cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
	DAG.getConstant(0, dl, HalfT));
	cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
	DAG.getConstant(1, dl, HalfT));
	cpInL = DAG.getCopyToReg(N->getOperand(0), dl,
	Regs64bit ? X86::RAX : X86::EAX,
	cpInL, SDValue());
	cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl,
	Regs64bit ? X86::RDX : X86::EDX,
	cpInH, cpInL.getValue(1));
	SDValue swapInL, swapInH;
	swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
	DAG.getConstant(0, dl, HalfT));
	swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
	DAG.getConstant(1, dl, HalfT));
	swapInH =
	DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RCX : X86::ECX,
	swapInH, cpInH.getValue(1));
	// If the current function needs the base pointer, RBX,
	// we shouldn't use cmpxchg directly.
	// Indeed the lowering of that instruction will clobber
	// that register and since RBX will be a reserved register
	// the register allocator will not make sure its value will
	// be properly saved and restored around this live-range.
	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
	SDValue Result;
	SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
	unsigned BasePtr = TRI->getBaseRegister();
	MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
	if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
	(BasePtr == X86::RBX \|\| BasePtr == X86::EBX)) {
	// ISel prefers the LCMPXCHG64 variant.
	// If that assert breaks, that means it is not the case anymore,
	// and we need to teach LCMPXCHG8_SAVE_EBX_DAG how to save RBX,
	// not just EBX. This is a matter of accepting i64 input for that
	// pseudo, and restoring into the register of the right wide
	// in expand pseudo. Everything else should just work.
	assert(((Regs64bit == (BasePtr == X86::RBX)) \|\| BasePtr == X86::EBX) &&
	"Saving only half of the RBX");
	unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_SAVE_RBX_DAG
	: X86ISD::LCMPXCHG8_SAVE_EBX_DAG;
	SDValue RBXSave = DAG.getCopyFromReg(swapInH.getValue(0), dl,
	Regs64bit ? X86::RBX : X86::EBX,
	HalfT, swapInH.getValue(1));
	SDValue Ops[] = {/Chain/ RBXSave.getValue(1), N->getOperand(1), swapInL,
	RBXSave,
	/Glue/ RBXSave.getValue(2)};
	Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO);
	} else {
	unsigned Opcode =
	Regs64bit ? X86ISD::LCMPXCHG16_DAG : X86ISD::LCMPXCHG8_DAG;
	swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl,
	Regs64bit ? X86::RBX : X86::EBX, swapInL,
	swapInH.getValue(1));
	SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1),
	swapInL.getValue(1)};
	Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO);
	}
	SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
	Regs64bit ? X86::RAX : X86::EAX,
	HalfT, Result.getValue(1));
	SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
	Regs64bit ? X86::RDX : X86::EDX,
	HalfT, cpOutL.getValue(2));
	SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};

	SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS,
	MVT::i32, cpOutH.getValue(2));
	SDValue Success = getSETCC(X86::COND_E, EFLAGS, dl, DAG);
	Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1));

	Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF));
	Results.push_back(Success);
	Results.push_back(EFLAGS.getValue(1));
	return;
	}
	case ISD::ATOMIC_SWAP:
	case ISD::ATOMIC_LOAD_ADD:
	case ISD::ATOMIC_LOAD_SUB:
	case ISD::ATOMIC_LOAD_AND:
	case ISD::ATOMIC_LOAD_OR:
	case ISD::ATOMIC_LOAD_XOR:
	case ISD::ATOMIC_LOAD_NAND:
	case ISD::ATOMIC_LOAD_MIN:
	case ISD::ATOMIC_LOAD_MAX:
	case ISD::ATOMIC_LOAD_UMIN:
	case ISD::ATOMIC_LOAD_UMAX:
	case ISD::ATOMIC_LOAD: {
	// Delegate to generic TypeLegalization. Situations we can really handle
	// should have already been dealt with by AtomicExpandPass.cpp.
	break;
	}
	case ISD::BITCAST: {
	assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
	EVT DstVT = N->getValueType(0);
	EVT SrcVT = N->getOperand(0)->getValueType(0);

	if (SrcVT != MVT::f64 \|\|
	(DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8))
	return;

	unsigned NumElts = DstVT.getVectorNumElements();
	EVT SVT = DstVT.getVectorElementType();
	EVT WiderVT = EVT::getVectorVT(DAG.getContext(), SVT, NumElts 2);
	SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
	MVT::v2f64, N->getOperand(0));
	SDValue ToVecInt = DAG.getBitcast(WiderVT, Expanded);

	if (ExperimentalVectorWideningLegalization) {
	// If we are legalizing vectors by widening, we already have the desired
	// legal vector type, just return it.
	Results.push_back(ToVecInt);
	return;
	}

	SmallVector<SDValue, 8> Elts;
	for (unsigned i = 0, e = NumElts; i != e; ++i)
	Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT,
	ToVecInt, DAG.getIntPtrConstant(i, dl)));

	Results.push_back(DAG.getBuildVector(DstVT, dl, Elts));
	}
	}
	}

	const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
	switch ((X86ISD::NodeType)Opcode) {
	case X86ISD::FIRST_NUMBER: break;
	case X86ISD::BSF: return "X86ISD::BSF";
	case X86ISD::BSR: return "X86ISD::BSR";
	case X86ISD::SHLD: return "X86ISD::SHLD";
	case X86ISD::SHRD: return "X86ISD::SHRD";
	case X86ISD::FAND: return "X86ISD::FAND";
	case X86ISD::FANDN: return "X86ISD::FANDN";
	case X86ISD::FOR: return "X86ISD::FOR";
	case X86ISD::FXOR: return "X86ISD::FXOR";
	case X86ISD::FILD: return "X86ISD::FILD";
	case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
	case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
	case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
	case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
	case X86ISD::FLD: return "X86ISD::FLD";
	case X86ISD::FST: return "X86ISD::FST";
	case X86ISD::CALL: return "X86ISD::CALL";
	case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
	case X86ISD::RDTSCP_DAG: return "X86ISD::RDTSCP_DAG";
	case X86ISD::RDPMC_DAG: return "X86ISD::RDPMC_DAG";
	case X86ISD::BT: return "X86ISD::BT";
	case X86ISD::CMP: return "X86ISD::CMP";
	case X86ISD::COMI: return "X86ISD::COMI";
	case X86ISD::UCOMI: return "X86ISD::UCOMI";
	case X86ISD::CMPM: return "X86ISD::CMPM";
	case X86ISD::CMPMU: return "X86ISD::CMPMU";
	case X86ISD::CMPM_RND: return "X86ISD::CMPM_RND";
	case X86ISD::SETCC: return "X86ISD::SETCC";
	case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
	case X86ISD::FSETCC: return "X86ISD::FSETCC";
	case X86ISD::FSETCCM: return "X86ISD::FSETCCM";
	case X86ISD::FSETCCM_RND: return "X86ISD::FSETCCM_RND";
	case X86ISD::CMOV: return "X86ISD::CMOV";
	case X86ISD::BRCOND: return "X86ISD::BRCOND";
	case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
	case X86ISD::IRET: return "X86ISD::IRET";
	case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
	case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
	case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
	case X86ISD::Wrapper: return "X86ISD::Wrapper";
	case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP";
	case X86ISD::MOVDQ2Q: return "X86ISD::MOVDQ2Q";
	case X86ISD::MMX_MOVD2W: return "X86ISD::MMX_MOVD2W";
	case X86ISD::MMX_MOVW2D: return "X86ISD::MMX_MOVW2D";
	case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
	case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
	case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
	case X86ISD::PINSRB: return "X86ISD::PINSRB";
	case X86ISD::PINSRW: return "X86ISD::PINSRW";
	case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
	case X86ISD::ANDNP: return "X86ISD::ANDNP";
	case X86ISD::BLENDI: return "X86ISD::BLENDI";
	case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND";
	case X86ISD::ADDUS: return "X86ISD::ADDUS";
	case X86ISD::SUBUS: return "X86ISD::SUBUS";
	case X86ISD::HADD: return "X86ISD::HADD";
	case X86ISD::HSUB: return "X86ISD::HSUB";
	case X86ISD::FHADD: return "X86ISD::FHADD";
	case X86ISD::FHSUB: return "X86ISD::FHSUB";
	case X86ISD::CONFLICT: return "X86ISD::CONFLICT";
	case X86ISD::FMAX: return "X86ISD::FMAX";
	case X86ISD::FMAXS: return "X86ISD::FMAXS";
	case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
	case X86ISD::FMAXS_RND: return "X86ISD::FMAX_RND";
	case X86ISD::FMIN: return "X86ISD::FMIN";
	case X86ISD::FMINS: return "X86ISD::FMINS";
	case X86ISD::FMIN_RND: return "X86ISD::FMIN_RND";
	case X86ISD::FMINS_RND: return "X86ISD::FMINS_RND";
	case X86ISD::FMAXC: return "X86ISD::FMAXC";
	case X86ISD::FMINC: return "X86ISD::FMINC";
	case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
	case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS";
	case X86ISD::FRCP: return "X86ISD::FRCP";
	case X86ISD::FRCPS: return "X86ISD::FRCPS";
	case X86ISD::EXTRQI: return "X86ISD::EXTRQI";
	case X86ISD::INSERTQI: return "X86ISD::INSERTQI";
	case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
	case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
	case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
	case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP";
	case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP";
	case X86ISD::EH_SJLJ_SETUP_DISPATCH:
	return "X86ISD::EH_SJLJ_SETUP_DISPATCH";
	case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
	case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
	case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
	case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r";
	case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
	case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
	case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG";
	case X86ISD::LCMPXCHG8_SAVE_EBX_DAG:
	return "X86ISD::LCMPXCHG8_SAVE_EBX_DAG";
	case X86ISD::LCMPXCHG16_SAVE_RBX_DAG:
	return "X86ISD::LCMPXCHG16_SAVE_RBX_DAG";
	case X86ISD::LADD: return "X86ISD::LADD";
	case X86ISD::LSUB: return "X86ISD::LSUB";
	case X86ISD::LOR: return "X86ISD::LOR";
	case X86ISD::LXOR: return "X86ISD::LXOR";
	case X86ISD::LAND: return "X86ISD::LAND";
	case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
	case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
	case X86ISD::VZEXT: return "X86ISD::VZEXT";
	case X86ISD::VSEXT: return "X86ISD::VSEXT";
	case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
	case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
	case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
	case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES";
	case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS";
	case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES";
	case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS";
	case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
	case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND";
	case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND";
	case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
	case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
	case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
	case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK";
	case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
	case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
	case X86ISD::VSHL: return "X86ISD::VSHL";
	case X86ISD::VSRL: return "X86ISD::VSRL";
	case X86ISD::VSRA: return "X86ISD::VSRA";
	case X86ISD::VSHLI: return "X86ISD::VSHLI";
	case X86ISD::VSRLI: return "X86ISD::VSRLI";
	case X86ISD::VSRAI: return "X86ISD::VSRAI";
	case X86ISD::VSRAV: return "X86ISD::VSRAV";
	case X86ISD::VROTLI: return "X86ISD::VROTLI";
	case X86ISD::VROTRI: return "X86ISD::VROTRI";
	case X86ISD::VPPERM: return "X86ISD::VPPERM";
	case X86ISD::CMPP: return "X86ISD::CMPP";
	case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
	case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
	case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM";
	case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM";
	case X86ISD::ADD: return "X86ISD::ADD";
	case X86ISD::SUB: return "X86ISD::SUB";
	case X86ISD::ADC: return "X86ISD::ADC";
	case X86ISD::SBB: return "X86ISD::SBB";
	case X86ISD::SMUL: return "X86ISD::SMUL";
	case X86ISD::UMUL: return "X86ISD::UMUL";
	case X86ISD::SMUL8: return "X86ISD::SMUL8";
	case X86ISD::UMUL8: return "X86ISD::UMUL8";
	case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG";
	case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG";
	case X86ISD::INC: return "X86ISD::INC";
	case X86ISD::DEC: return "X86ISD::DEC";
	case X86ISD::OR: return "X86ISD::OR";
	case X86ISD::XOR: return "X86ISD::XOR";
	case X86ISD::AND: return "X86ISD::AND";
	case X86ISD::BEXTR: return "X86ISD::BEXTR";
	case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
	case X86ISD::MOVMSK: return "X86ISD::MOVMSK";
	case X86ISD::PTEST: return "X86ISD::PTEST";
	case X86ISD::TESTP: return "X86ISD::TESTP";
	case X86ISD::TESTM: return "X86ISD::TESTM";
	case X86ISD::TESTNM: return "X86ISD::TESTNM";
	case X86ISD::KORTEST: return "X86ISD::KORTEST";
	case X86ISD::KTEST: return "X86ISD::KTEST";
	case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL";
	case X86ISD::KSHIFTR: return "X86ISD::KSHIFTR";
	case X86ISD::PACKSS: return "X86ISD::PACKSS";
	case X86ISD::PACKUS: return "X86ISD::PACKUS";
	case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
	case X86ISD::VALIGN: return "X86ISD::VALIGN";
	case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
	case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
	case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
	case X86ISD::SHUFP: return "X86ISD::SHUFP";
	case X86ISD::SHUF128: return "X86ISD::SHUF128";
	case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
	case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
	case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
	case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
	case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
	case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
	case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
	case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
	case X86ISD::MOVSD: return "X86ISD::MOVSD";
	case X86ISD::MOVSS: return "X86ISD::MOVSS";
	case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
	case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
	case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
	case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
	case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST";
	case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT";
	case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV";
	case X86ISD::VPERMILPI: return "X86ISD::VPERMILPI";
	case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
	case X86ISD::VPERMV: return "X86ISD::VPERMV";
	case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
	case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
	case X86ISD::VPERMI: return "X86ISD::VPERMI";
	case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
	case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
	case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS";
	case X86ISD::VRANGE: return "X86ISD::VRANGE";
	case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
	case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
	case X86ISD::PSADBW: return "X86ISD::PSADBW";
	case X86ISD::DBPSADBW: return "X86ISD::DBPSADBW";
	case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
	case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
	case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
	case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
	case X86ISD::MFENCE: return "X86ISD::MFENCE";
	case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
	case X86ISD::SAHF: return "X86ISD::SAHF";
	case X86ISD::RDRAND: return "X86ISD::RDRAND";
	case X86ISD::RDSEED: return "X86ISD::RDSEED";
	case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW";
	case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD";
	case X86ISD::VPROT: return "X86ISD::VPROT";
	case X86ISD::VPROTI: return "X86ISD::VPROTI";
	case X86ISD::VPSHA: return "X86ISD::VPSHA";
	case X86ISD::VPSHL: return "X86ISD::VPSHL";
	case X86ISD::VPCOM: return "X86ISD::VPCOM";
	case X86ISD::VPCOMU: return "X86ISD::VPCOMU";
	case X86ISD::VPERMIL2: return "X86ISD::VPERMIL2";
	case X86ISD::FMADD: return "X86ISD::FMADD";
	case X86ISD::FMSUB: return "X86ISD::FMSUB";
	case X86ISD::FNMADD: return "X86ISD::FNMADD";
	case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
	case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
	case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
	case X86ISD::FMADD_RND: return "X86ISD::FMADD_RND";
	case X86ISD::FNMADD_RND: return "X86ISD::FNMADD_RND";
	case X86ISD::FMSUB_RND: return "X86ISD::FMSUB_RND";
	case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND";
	case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND";
	case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND";
	case X86ISD::FMADDS1_RND: return "X86ISD::FMADDS1_RND";
	case X86ISD::FNMADDS1_RND: return "X86ISD::FNMADDS1_RND";
	case X86ISD::FMSUBS1_RND: return "X86ISD::FMSUBS1_RND";
	case X86ISD::FNMSUBS1_RND: return "X86ISD::FNMSUBS1_RND";
	case X86ISD::FMADDS3_RND: return "X86ISD::FMADDS3_RND";
	case X86ISD::FNMADDS3_RND: return "X86ISD::FNMADDS3_RND";
	case X86ISD::FMSUBS3_RND: return "X86ISD::FMSUBS3_RND";
	case X86ISD::FNMSUBS3_RND: return "X86ISD::FNMSUBS3_RND";
	case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H";
	case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L";
	case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
	case X86ISD::VRNDSCALES: return "X86ISD::VRNDSCALES";
	case X86ISD::VREDUCE: return "X86ISD::VREDUCE";
	case X86ISD::VREDUCES: return "X86ISD::VREDUCES";
	case X86ISD::VGETMANT: return "X86ISD::VGETMANT";
	case X86ISD::VGETMANTS: return "X86ISD::VGETMANTS";
	case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
	case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
	case X86ISD::XTEST: return "X86ISD::XTEST";
	case X86ISD::COMPRESS: return "X86ISD::COMPRESS";
	case X86ISD::EXPAND: return "X86ISD::EXPAND";
	case X86ISD::SELECT: return "X86ISD::SELECT";
	case X86ISD::SELECTS: return "X86ISD::SELECTS";
	case X86ISD::ADDSUB: return "X86ISD::ADDSUB";
	case X86ISD::RCP28: return "X86ISD::RCP28";
	case X86ISD::RCP28S: return "X86ISD::RCP28S";
	case X86ISD::EXP2: return "X86ISD::EXP2";
	case X86ISD::RSQRT28: return "X86ISD::RSQRT28";
	case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S";
	case X86ISD::FADD_RND: return "X86ISD::FADD_RND";
	case X86ISD::FADDS_RND: return "X86ISD::FADDS_RND";
	case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND";
	case X86ISD::FSUBS_RND: return "X86ISD::FSUBS_RND";
	case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND";
	case X86ISD::FMULS_RND: return "X86ISD::FMULS_RND";
	case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
	case X86ISD::FDIVS_RND: return "X86ISD::FDIVS_RND";
	case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
	case X86ISD::FSQRTS_RND: return "X86ISD::FSQRTS_RND";
	case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
	case X86ISD::FGETEXPS_RND: return "X86ISD::FGETEXPS_RND";
	case X86ISD::SCALEF: return "X86ISD::SCALEF";
	case X86ISD::SCALEFS: return "X86ISD::SCALEFS";
	case X86ISD::ADDS: return "X86ISD::ADDS";
	case X86ISD::SUBS: return "X86ISD::SUBS";
	case X86ISD::AVG: return "X86ISD::AVG";
	case X86ISD::MULHRS: return "X86ISD::MULHRS";
	case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
	case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
	case X86ISD::CVTTP2SI: return "X86ISD::CVTTP2SI";
	case X86ISD::CVTTP2UI: return "X86ISD::CVTTP2UI";
	case X86ISD::CVTTP2SI_RND: return "X86ISD::CVTTP2SI_RND";
	case X86ISD::CVTTP2UI_RND: return "X86ISD::CVTTP2UI_RND";
	case X86ISD::CVTTS2SI_RND: return "X86ISD::CVTTS2SI_RND";
	case X86ISD::CVTTS2UI_RND: return "X86ISD::CVTTS2UI_RND";
	case X86ISD::CVTSI2P: return "X86ISD::CVTSI2P";
	case X86ISD::CVTUI2P: return "X86ISD::CVTUI2P";
	case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS";
	case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS";
	case X86ISD::MULTISHIFT: return "X86ISD::MULTISHIFT";
	case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND";
	case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
	case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH";
	case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS";
	case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI";
	case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI";
	case X86ISD::CVTP2SI_RND: return "X86ISD::CVTP2SI_RND";
	case X86ISD::CVTP2UI_RND: return "X86ISD::CVTP2UI_RND";
	case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND";
	case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND";
	case X86ISD::LWPINS: return "X86ISD::LWPINS";
	case X86ISD::MGATHER: return "X86ISD::MGATHER";
	}
	return nullptr;
	}

	/// Return true if the addressing mode represented by AM is legal for this
	/// target, for a load/store of the specified type.
	bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
	const AddrMode &AM, Type *Ty,
	unsigned AS) const {
	// X86 supports extremely general addressing modes.
	CodeModel::Model M = getTargetMachine().getCodeModel();

	// X86 allows a sign-extended 32-bit immediate field as a displacement.
	if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != nullptr))
	return false;

	if (AM.BaseGV) {
	unsigned GVFlags = Subtarget.classifyGlobalReference(AM.BaseGV);

	// If a reference to this global requires an extra load, we can't fold it.
	if (isGlobalStubReference(GVFlags))
	return false;

	// If BaseGV requires a register for the PIC base, we cannot also have a
	// BaseReg specified.
	if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
	return false;

	// If lower 4G is not available, then we must use rip-relative addressing.
	if ((M != CodeModel::Small \|\| isPositionIndependent()) &&
	Subtarget.is64Bit() && (AM.BaseOffs \|\| AM.Scale > 1))
	return false;
	}

	switch (AM.Scale) {
	case 0:
	case 1:
	case 2:
	case 4:
	case 8:
	// These scales always work.
	break;
	case 3:
	case 5:
	case 9:
	// These scales are formed with basereg+scalereg. Only accept if there is
	// no basereg yet.
	if (AM.HasBaseReg)
	return false;
	break;
	default: // Other stuff never works.
	return false;
	}

	return true;
	}

	bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const {
	unsigned Bits = Ty->getScalarSizeInBits();

	// 8-bit shifts are always expensive, but versions with a scalar amount aren't
	// particularly cheaper than those without.
	if (Bits == 8)
	return false;

	// On AVX2 there are new vpsllv[dq] instructions (and other shifts), that make
	// variable shifts just as cheap as scalar ones.
	if (Subtarget.hasInt256() && (Bits == 32 \|\| Bits == 64))
	return false;

	// Otherwise, it's significantly cheaper to shift by a scalar amount than by a
	// fully general vector.
	return true;
	}

	bool X86TargetLowering::isTruncateFree(Type Ty1, Type Ty2) const {
	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
	return false;
	unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
	unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
	return NumBits1 > NumBits2;
	}

	bool X86TargetLowering::allowTruncateForTailCall(Type Ty1, Type Ty2) const {
	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
	return false;

	if (!isTypeLegal(EVT::getEVT(Ty1)))
	return false;

	assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");

	// Assuming the caller doesn't have a zeroext or signext return parameter,
	// truncation all the way down to i1 is valid.
	return true;
	}

	bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const {
	return isInt<32>(Imm);
	}

	bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
	// Can also use sub to handle negated immediates.
	return isInt<32>(Imm);
	}

	bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
	if (!VT1.isInteger() \|\| !VT2.isInteger())
	return false;
	unsigned NumBits1 = VT1.getSizeInBits();
	unsigned NumBits2 = VT2.getSizeInBits();
	return NumBits1 > NumBits2;
	}

	bool X86TargetLowering::isZExtFree(Type Ty1, Type Ty2) const {
	// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
	return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget.is64Bit();
	}

	bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
	// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
	return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget.is64Bit();
	}

	bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
	EVT VT1 = Val.getValueType();
	if (isZExtFree(VT1, VT2))
	return true;

	if (Val.getOpcode() != ISD::LOAD)
	return false;

	if (!VT1.isSimple() \|\| !VT1.isInteger() \|\|
	!VT2.isSimple() \|\| !VT2.isInteger())
	return false;

	switch (VT1.getSimpleVT().SimpleTy) {
	default: break;
	case MVT::i8:
	case MVT::i16:
	case MVT::i32:
	// X86 has 8, 16, and 32-bit zero-extending loads.
	return true;
	}

	return false;
	}

	bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; }

	bool
	X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
	if (!Subtarget.hasAnyFMA())
	return false;

	VT = VT.getScalarType();

	if (!VT.isSimple())
	return false;

	switch (VT.getSimpleVT().SimpleTy) {
	case MVT::f32:
	case MVT::f64:
	return true;
	default:
	break;
	}

	return false;
	}

	bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
	// i16 instructions are longer (0x66 prefix) and potentially slower.
	return !(VT1 == MVT::i32 && VT2 == MVT::i16);
	}

	/// Targets can use this to indicate that they only support some
	/// VECTOR_SHUFFLE operations, those with specific masks.
	/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
	/// are assumed to be legal.
	bool
	X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
	EVT VT) const {
	if (!VT.isSimple())
	return false;

	// Not for i1 vectors
	if (VT.getSimpleVT().getScalarType() == MVT::i1)
	return false;

	// Very little shuffling can be done for 64-bit vectors right now.
	if (VT.getSimpleVT().getSizeInBits() == 64)
	return false;

	// We only care that the types being shuffled are legal. The lowering can
	// handle any possible shuffle mask that results.
	return isTypeLegal(VT.getSimpleVT());
	}

	bool
	X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
	EVT VT) const {
	// Just delegate to the generic legality, clear masks aren't special.
	return isShuffleMaskLegal(Mask, VT);
	}

	//===----------------------------------------------------------------------===//
	// X86 Scheduler Hooks
	//===----------------------------------------------------------------------===//

	/// Utility function to emit xbegin specifying the start of an RTM region.
	static MachineBasicBlock emitXBegin(MachineInstr &MI, MachineBasicBlock MBB,
	const TargetInstrInfo *TII) {
	DebugLoc DL = MI.getDebugLoc();

	const BasicBlock *BB = MBB->getBasicBlock();
	MachineFunction::iterator I = ++MBB->getIterator();

	// For the v = xbegin(), we generate
	//
	// thisMBB:
	// xbegin sinkMBB
	//
	// mainMBB:
	// s0 = -1
	//
	// fallBB:
	// eax = # XABORT_DEF
	// s1 = eax
	//
	// sinkMBB:
	// v = phi(s0/mainBB, s1/fallBB)

	MachineBasicBlock *thisMBB = MBB;
	MachineFunction *MF = MBB->getParent();
	MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
	MF->insert(I, mainMBB);
	MF->insert(I, fallMBB);
	MF->insert(I, sinkMBB);

	// Transfer the remainder of BB and its successor edges to sinkMBB.
	sinkMBB->splice(sinkMBB->begin(), MBB,
	std::next(MachineBasicBlock::iterator(MI)), MBB->end());
	sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);

	MachineRegisterInfo &MRI = MF->getRegInfo();
	unsigned DstReg = MI.getOperand(0).getReg();
	const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
	unsigned mainDstReg = MRI.createVirtualRegister(RC);
	unsigned fallDstReg = MRI.createVirtualRegister(RC);

	// thisMBB:
	// xbegin fallMBB
	// # fallthrough to mainMBB
	// # abortion to fallMBB
	BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(fallMBB);
	thisMBB->addSuccessor(mainMBB);
	thisMBB->addSuccessor(fallMBB);

	// mainMBB:
	// mainDstReg := -1
	BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), mainDstReg).addImm(-1);
	BuildMI(mainMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
	mainMBB->addSuccessor(sinkMBB);

	// fallMBB:
	// ; pseudo instruction to model hardware's definition from XABORT
	// EAX := XABORT_DEF
	// fallDstReg := EAX
	BuildMI(fallMBB, DL, TII->get(X86::XABORT_DEF));
	BuildMI(fallMBB, DL, TII->get(TargetOpcode::COPY), fallDstReg)
	.addReg(X86::EAX);
	fallMBB->addSuccessor(sinkMBB);

	// sinkMBB:
	// DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB)
	BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg)
	.addReg(mainDstReg).addMBB(mainMBB)
	.addReg(fallDstReg).addMBB(fallMBB);

	MI.eraseFromParent();
	return sinkMBB;
	}

	// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
	// or XMM0_V32I8 in AVX all of this code can be replaced with that
	// in the .td file.
	static MachineBasicBlock emitPCMPSTRM(MachineInstr &MI, MachineBasicBlock BB,
	const TargetInstrInfo *TII) {
	unsigned Opc;
	switch (MI.getOpcode()) {
	default: llvm_unreachable("illegal opcode!");
	case X86::PCMPISTRM128REG: Opc = X86::PCMPISTRM128rr; break;
	case X86::VPCMPISTRM128REG: Opc = X86::VPCMPISTRM128rr; break;
	case X86::PCMPISTRM128MEM: Opc = X86::PCMPISTRM128rm; break;
	case X86::VPCMPISTRM128MEM: Opc = X86::VPCMPISTRM128rm; break;
	case X86::PCMPESTRM128REG: Opc = X86::PCMPESTRM128rr; break;
	case X86::VPCMPESTRM128REG: Opc = X86::VPCMPESTRM128rr; break;
	case X86::PCMPESTRM128MEM: Opc = X86::PCMPESTRM128rm; break;
	case X86::VPCMPESTRM128MEM: Opc = X86::VPCMPESTRM128rm; break;
	}

	DebugLoc dl = MI.getDebugLoc();
	MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));

	unsigned NumArgs = MI.getNumOperands();
	for (unsigned i = 1; i < NumArgs; ++i) {
	MachineOperand &Op = MI.getOperand(i);
	if (!(Op.isReg() && Op.isImplicit()))
	MIB.add(Op);
	}
	if (MI.hasOneMemOperand())
	MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());

	BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
	.addReg(X86::XMM0);

	MI.eraseFromParent();
	return BB;
	}

	// FIXME: Custom handling because TableGen doesn't support multiple implicit
	// defs in an instruction pattern
	static MachineBasicBlock emitPCMPSTRI(MachineInstr &MI, MachineBasicBlock BB,
	const TargetInstrInfo *TII) {
	unsigned Opc;
	switch (MI.getOpcode()) {
	default: llvm_unreachable("illegal opcode!");
	case X86::PCMPISTRIREG: Opc = X86::PCMPISTRIrr; break;
	case X86::VPCMPISTRIREG: Opc = X86::VPCMPISTRIrr; break;
	case X86::PCMPISTRIMEM: Opc = X86::PCMPISTRIrm; break;
	case X86::VPCMPISTRIMEM: Opc = X86::VPCMPISTRIrm; break;
	case X86::PCMPESTRIREG: Opc = X86::PCMPESTRIrr; break;
	case X86::VPCMPESTRIREG: Opc = X86::VPCMPESTRIrr; break;
	case X86::PCMPESTRIMEM: Opc = X86::PCMPESTRIrm; break;
	case X86::VPCMPESTRIMEM: Opc = X86::VPCMPESTRIrm; break;
	}

	DebugLoc dl = MI.getDebugLoc();
	MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));

	unsigned NumArgs = MI.getNumOperands(); // remove the results
	for (unsigned i = 1; i < NumArgs; ++i) {
	MachineOperand &Op = MI.getOperand(i);
	if (!(Op.isReg() && Op.isImplicit()))
	MIB.add(Op);
	}
	if (MI.hasOneMemOperand())
	MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());

	BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
	.addReg(X86::ECX);

	MI.eraseFromParent();
	return BB;
	}

	static MachineBasicBlock emitWRPKRU(MachineInstr &MI, MachineBasicBlock BB,
	const X86Subtarget &Subtarget) {
	DebugLoc dl = MI.getDebugLoc();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();

	// insert input VAL into EAX
	BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
	.addReg(MI.getOperand(0).getReg());
	// insert zero to ECX
	BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::ECX);

	// insert zero to EDX
	BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::EDX);

	// insert WRPKRU instruction
	BuildMI(*BB, MI, dl, TII->get(X86::WRPKRUr));

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	static MachineBasicBlock emitRDPKRU(MachineInstr &MI, MachineBasicBlock BB,
	const X86Subtarget &Subtarget) {
	DebugLoc dl = MI.getDebugLoc();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();

	// insert zero to ECX
	BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::ECX);

	// insert RDPKRU instruction
	BuildMI(*BB, MI, dl, TII->get(X86::RDPKRUr));
	BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
	.addReg(X86::EAX);

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	static MachineBasicBlock emitMonitor(MachineInstr &MI, MachineBasicBlock BB,
	const X86Subtarget &Subtarget,
	unsigned Opc) {
	DebugLoc dl = MI.getDebugLoc();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	// Address into RAX/EAX, other two args into ECX, EDX.
	unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
	unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
	MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
	for (int i = 0; i < X86::AddrNumOperands; ++i)
	MIB.add(MI.getOperand(i));

	unsigned ValOps = X86::AddrNumOperands;
	BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
	.addReg(MI.getOperand(ValOps).getReg());
	BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
	.addReg(MI.getOperand(ValOps + 1).getReg());

	// The instruction doesn't actually take any operands though.
	BuildMI(*BB, MI, dl, TII->get(Opc));

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	static MachineBasicBlock emitClzero(MachineInstr MI, MachineBasicBlock *BB,
	const X86Subtarget &Subtarget) {
	DebugLoc dl = MI->getDebugLoc();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	// Address into RAX/EAX
	unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
	unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
	MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
	for (int i = 0; i < X86::AddrNumOperands; ++i)
	MIB.add(MI->getOperand(i));

	// The instruction doesn't actually take any operands though.
	BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr));

	MI->eraseFromParent(); // The pseudo is gone now.
	return BB;
	}



	MachineBasicBlock *
	X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	// Emit va_arg instruction on X86-64.

	// Operands to this pseudo-instruction:
	// 0 ) Output : destination address (reg)
	// 1-5) Input : va_list address (addr, i64mem)
	// 6 ) ArgSize : Size (in bytes) of vararg type
	// 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset
	// 8 ) Align : Alignment of type
	// 9 ) EFLAGS (implicit-def)

	assert(MI.getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
	static_assert(X86::AddrNumOperands == 5,
	"VAARG_64 assumes 5 address operands");

	unsigned DestReg = MI.getOperand(0).getReg();
	MachineOperand &Base = MI.getOperand(1);
	MachineOperand &Scale = MI.getOperand(2);
	MachineOperand &Index = MI.getOperand(3);
	MachineOperand &Disp = MI.getOperand(4);
	MachineOperand &Segment = MI.getOperand(5);
	unsigned ArgSize = MI.getOperand(6).getImm();
	unsigned ArgMode = MI.getOperand(7).getImm();
	unsigned Align = MI.getOperand(8).getImm();

	// Memory Reference
	assert(MI.hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
	MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
	MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();

	// Machine Information
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
	const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
	const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
	DebugLoc DL = MI.getDebugLoc();

	// struct va_list {
	// i32 gp_offset
	// i32 fp_offset
	// i64 overflow_area (address)
	// i64 reg_save_area (address)
	// }
	// sizeof(va_list) = 24
	// alignment(va_list) = 8

	unsigned TotalNumIntRegs = 6;
	unsigned TotalNumXMMRegs = 8;
	bool UseGPOffset = (ArgMode == 1);
	bool UseFPOffset = (ArgMode == 2);
	unsigned MaxOffset = TotalNumIntRegs * 8 +
	(UseFPOffset ? TotalNumXMMRegs * 16 : 0);

	/* Align ArgSize to a multiple of 8 */
	unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
	bool NeedsAlign = (Align > 8);

	MachineBasicBlock *thisMBB = MBB;
	MachineBasicBlock *overflowMBB;
	MachineBasicBlock *offsetMBB;
	MachineBasicBlock *endMBB;

	unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB
	unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
	unsigned OffsetReg = 0;

	if (!UseGPOffset && !UseFPOffset) {
	// If we only pull from the overflow region, we don't create a branch.
	// We don't need to alter control flow.
	OffsetDestReg = 0; // unused
	OverflowDestReg = DestReg;

	offsetMBB = nullptr;
	overflowMBB = thisMBB;
	endMBB = thisMBB;
	} else {
	// First emit code to check if gp_offset (or fp_offset) is below the bound.
	// If so, pull the argument from reg_save_area. (branch to offsetMBB)
	// If not, pull from overflow_area. (branch to overflowMBB)
	//
	// thisMBB
	// \| .
	// \| .
	// offsetMBB overflowMBB
	// \| .
	// \| .
	// endMBB

	// Registers for the PHI in endMBB
	OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
	OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);

	const BasicBlock *LLVM_BB = MBB->getBasicBlock();
	MachineFunction *MF = MBB->getParent();
	overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
	offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
	endMBB = MF->CreateMachineBasicBlock(LLVM_BB);

	MachineFunction::iterator MBBIter = ++MBB->getIterator();

	// Insert the new basic blocks
	MF->insert(MBBIter, offsetMBB);
	MF->insert(MBBIter, overflowMBB);
	MF->insert(MBBIter, endMBB);

	// Transfer the remainder of MBB and its successor edges to endMBB.
	endMBB->splice(endMBB->begin(), thisMBB,
	std::next(MachineBasicBlock::iterator(MI)), thisMBB->end());
	endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);

	// Make offsetMBB and overflowMBB successors of thisMBB
	thisMBB->addSuccessor(offsetMBB);
	thisMBB->addSuccessor(overflowMBB);

	// endMBB is a successor of both offsetMBB and overflowMBB
	offsetMBB->addSuccessor(endMBB);
	overflowMBB->addSuccessor(endMBB);

	// Load the offset value into a register
	OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
	BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
	.add(Base)
	.add(Scale)
	.add(Index)
	.addDisp(Disp, UseFPOffset ? 4 : 0)
	.add(Segment)
	.setMemRefs(MMOBegin, MMOEnd);

	// Check if there is enough room left to pull this argument.
	BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
	.addReg(OffsetReg)
	.addImm(MaxOffset + 8 - ArgSizeA8);

	// Branch to "overflowMBB" if offset >= max
	// Fall through to "offsetMBB" otherwise
	BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
	.addMBB(overflowMBB);
	}

	// In offsetMBB, emit code to use the reg_save_area.
	if (offsetMBB) {
	assert(OffsetReg != 0);

	// Read the reg_save_area address.
	unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
	BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
	.add(Base)
	.add(Scale)
	.add(Index)
	.addDisp(Disp, 16)
	.add(Segment)
	.setMemRefs(MMOBegin, MMOEnd);

	// Zero-extend the offset
	unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
	BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
	.addImm(0)
	.addReg(OffsetReg)
	.addImm(X86::sub_32bit);

	// Add the offset to the reg_save_area to get the final address.
	BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
	.addReg(OffsetReg64)
	.addReg(RegSaveReg);

	// Compute the offset for the next argument
	unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
	BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
	.addReg(OffsetReg)
	.addImm(UseFPOffset ? 16 : 8);

	// Store it back into the va_list.
	BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
	.add(Base)
	.add(Scale)
	.add(Index)
	.addDisp(Disp, UseFPOffset ? 4 : 0)
	.add(Segment)
	.addReg(NextOffsetReg)
	.setMemRefs(MMOBegin, MMOEnd);

	// Jump to endMBB
	BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
	.addMBB(endMBB);
	}

	//
	// Emit code to use overflow area
	//

	// Load the overflow_area address into a register.
	unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
	BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
	.add(Base)
	.add(Scale)
	.add(Index)
	.addDisp(Disp, 8)
	.add(Segment)
	.setMemRefs(MMOBegin, MMOEnd);

	// If we need to align it, do so. Otherwise, just copy the address
	// to OverflowDestReg.
	if (NeedsAlign) {
	// Align the overflow address
	assert(isPowerOf2_32(Align) && "Alignment must be a power of 2");
	unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);

	// aligned_addr = (addr + (align-1)) & ~(align-1)
	BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
	.addReg(OverflowAddrReg)
	.addImm(Align-1);

	BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
	.addReg(TmpReg)
	.addImm(~(uint64_t)(Align-1));
	} else {
	BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
	.addReg(OverflowAddrReg);
	}

	// Compute the next overflow address after this argument.
	// (the overflow address should be kept 8-byte aligned)
	unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
	BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
	.addReg(OverflowDestReg)
	.addImm(ArgSizeA8);

	// Store the new overflow address.
	BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
	.add(Base)
	.add(Scale)
	.add(Index)
	.addDisp(Disp, 8)
	.add(Segment)
	.addReg(NextAddrReg)
	.setMemRefs(MMOBegin, MMOEnd);

	// If we branched, emit the PHI to the front of endMBB.
	if (offsetMBB) {
	BuildMI(*endMBB, endMBB->begin(), DL,
	TII->get(X86::PHI), DestReg)
	.addReg(OffsetDestReg).addMBB(offsetMBB)
	.addReg(OverflowDestReg).addMBB(overflowMBB);
	}

	// Erase the pseudo instruction
	MI.eraseFromParent();

	return endMBB;
	}

	MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
	MachineInstr &MI, MachineBasicBlock *MBB) const {
	// Emit code to save XMM registers to the stack. The ABI says that the
	// number of registers to save is given in %al, so it's theoretically
	// possible to do an indirect jump trick to avoid saving all of them,
	// however this code takes a simpler approach and just executes all
	// of the stores if %al is non-zero. It's less code, and it's probably
	// easier on the hardware branch predictor, and stores aren't all that
	// expensive anyway.

	// Create the new basic blocks. One block contains all the XMM stores,
	// and one block is the final destination regardless of whether any
	// stores were performed.
	const BasicBlock *LLVM_BB = MBB->getBasicBlock();
	MachineFunction *F = MBB->getParent();
	MachineFunction::iterator MBBIter = ++MBB->getIterator();
	MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
	F->insert(MBBIter, XMMSaveMBB);
	F->insert(MBBIter, EndMBB);

	// Transfer the remainder of MBB and its successor edges to EndMBB.
	EndMBB->splice(EndMBB->begin(), MBB,
	std::next(MachineBasicBlock::iterator(MI)), MBB->end());
	EndMBB->transferSuccessorsAndUpdatePHIs(MBB);

	// The original block will now fall through to the XMM save block.
	MBB->addSuccessor(XMMSaveMBB);
	// The XMMSaveMBB will fall through to the end block.
	XMMSaveMBB->addSuccessor(EndMBB);

	// Now add the instructions.
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	DebugLoc DL = MI.getDebugLoc();

	unsigned CountReg = MI.getOperand(0).getReg();
	int64_t RegSaveFrameIndex = MI.getOperand(1).getImm();
	int64_t VarArgsFPOffset = MI.getOperand(2).getImm();

	if (!Subtarget.isCallingConvWin64(F->getFunction()->getCallingConv())) {
	// If %al is 0, branch around the XMM save block.
	BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
	BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB);
	MBB->addSuccessor(EndMBB);
	}

	// Make sure the last operand is EFLAGS, which gets clobbered by the branch
	// that was just emitted, but clearly shouldn't be "saved".
	assert((MI.getNumOperands() <= 3 \|\|
	!MI.getOperand(MI.getNumOperands() - 1).isReg() \|\|
	MI.getOperand(MI.getNumOperands() - 1).getReg() == X86::EFLAGS) &&
	"Expected last argument to be EFLAGS");
	unsigned MOVOpc = Subtarget.hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
	// In the XMM save block, save all the XMM argument registers.
	for (int i = 3, e = MI.getNumOperands() - 1; i != e; ++i) {
	int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
	MachineMemOperand *MMO = F->getMachineMemOperand(
	MachinePointerInfo::getFixedStack(*F, RegSaveFrameIndex, Offset),
	MachineMemOperand::MOStore,
	/Size=/16, /Align=/16);
	BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc))
	.addFrameIndex(RegSaveFrameIndex)
	.addImm(/Scale=/1)
	.addReg(/IndexReg=/0)
	.addImm(/Disp=/Offset)
	.addReg(/Segment=/0)
	.addReg(MI.getOperand(i).getReg())
	.addMemOperand(MMO);
	}

	MI.eraseFromParent(); // The pseudo instruction is gone now.

	return EndMBB;
	}

	// The EFLAGS operand of SelectItr might be missing a kill marker
	// because there were multiple uses of EFLAGS, and ISel didn't know
	// which to mark. Figure out whether SelectItr should have had a
	// kill marker, and set it if it should. Returns the correct kill
	// marker value.
	static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
	MachineBasicBlock* BB,
	const TargetRegisterInfo* TRI) {
	// Scan forward through BB for a use/def of EFLAGS.
	MachineBasicBlock::iterator miI(std::next(SelectItr));
	for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
	const MachineInstr& mi = *miI;
	if (mi.readsRegister(X86::EFLAGS))
	return false;
	if (mi.definesRegister(X86::EFLAGS))
	break; // Should have kill-flag - update below.
	}

	// If we hit the end of the block, check whether EFLAGS is live into a
	// successor.
	if (miI == BB->end()) {
	for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
	sEnd = BB->succ_end();
	sItr != sEnd; ++sItr) {
	MachineBasicBlock* succ = *sItr;
	if (succ->isLiveIn(X86::EFLAGS))
	return false;
	}
	}

	// We found a def, or hit the end of the basic block and EFLAGS wasn't live
	// out. SelectMI should have a kill flag on EFLAGS.
	SelectItr->addRegisterKilled(X86::EFLAGS, TRI);
	return true;
	}

	// Return true if it is OK for this CMOV pseudo-opcode to be cascaded
	// together with other CMOV pseudo-opcodes into a single basic-block with
	// conditional jump around it.
	static bool isCMOVPseudo(MachineInstr &MI) {
	switch (MI.getOpcode()) {
	case X86::CMOV_FR32:
	case X86::CMOV_FR64:
	case X86::CMOV_GR8:
	case X86::CMOV_GR16:
	case X86::CMOV_GR32:
	case X86::CMOV_RFP32:
	case X86::CMOV_RFP64:
	case X86::CMOV_RFP80:
	case X86::CMOV_V2F64:
	case X86::CMOV_V2I64:
	case X86::CMOV_V4F32:
	case X86::CMOV_V4F64:
	case X86::CMOV_V4I64:
	case X86::CMOV_V16F32:
	case X86::CMOV_V8F32:
	case X86::CMOV_V8F64:
	case X86::CMOV_V8I64:
	case X86::CMOV_V8I1:
	case X86::CMOV_V16I1:
	case X86::CMOV_V32I1:
	case X86::CMOV_V64I1:
	return true;

	default:
	return false;
	}
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	DebugLoc DL = MI.getDebugLoc();

	// To "insert" a SELECT_CC instruction, we actually have to insert the
	// diamond control-flow pattern. The incoming instruction knows the
	// destination vreg to set, the condition code register to branch on, the
	// true/false values to select between, and a branch opcode to use.
	const BasicBlock *LLVM_BB = BB->getBasicBlock();
	MachineFunction::iterator It = ++BB->getIterator();

	// thisMBB:
	// ...
	// TrueVal = ...
	// cmpTY ccX, r1, r2
	// bCC copy1MBB
	// fallthrough --> copy0MBB
	MachineBasicBlock *thisMBB = BB;
	MachineFunction *F = BB->getParent();

	// This code lowers all pseudo-CMOV instructions. Generally it lowers these
	// as described above, by inserting a BB, and then making a PHI at the join
	// point to select the true and false operands of the CMOV in the PHI.
	//
	// The code also handles two different cases of multiple CMOV opcodes
	// in a row.
	//
	// Case 1:
	// In this case, there are multiple CMOVs in a row, all which are based on
	// the same condition setting (or the exact opposite condition setting).
	// In this case we can lower all the CMOVs using a single inserted BB, and
	// then make a number of PHIs at the join point to model the CMOVs. The only
	// trickiness here, is that in a case like:
	//
	// t2 = CMOV cond1 t1, f1
	// t3 = CMOV cond1 t2, f2
	//
	// when rewriting this into PHIs, we have to perform some renaming on the
	// temps since you cannot have a PHI operand refer to a PHI result earlier
	// in the same block. The "simple" but wrong lowering would be:
	//
	// t2 = PHI t1(BB1), f1(BB2)
	// t3 = PHI t2(BB1), f2(BB2)
	//
	// but clearly t2 is not defined in BB1, so that is incorrect. The proper
	// renaming is to note that on the path through BB1, t2 is really just a
	// copy of t1, and do that renaming, properly generating:
	//
	// t2 = PHI t1(BB1), f1(BB2)
	// t3 = PHI t1(BB1), f2(BB2)
	//
	// Case 2, we lower cascaded CMOVs such as
	//
	// (CMOV (CMOV F, T, cc1), T, cc2)
	//
	// to two successive branches. For that, we look for another CMOV as the
	// following instruction.
	//
	// Without this, we would add a PHI between the two jumps, which ends up
	// creating a few copies all around. For instance, for
	//
	// (sitofp (zext (fcmp une)))
	//
	// we would generate:
	//
	// ucomiss %xmm1, %xmm0
	// movss <1.0f>, %xmm0
	// movaps %xmm0, %xmm1
	// jne .LBB5_2
	// xorps %xmm1, %xmm1
	// .LBB5_2:
	// jp .LBB5_4
	// movaps %xmm1, %xmm0
	// .LBB5_4:
	// retq
	//
	// because this custom-inserter would have generated:
	//
	// A
	// \| \
	// \| B
	// \| /
	// C
	// \| \
	// \| D
	// \| /
	// E
	//
	// A: X = ...; Y = ...
	// B: empty
	// C: Z = PHI [X, A], [Y, B]
	// D: empty
	// E: PHI [X, C], [Z, D]
	//
	// If we lower both CMOVs in a single step, we can instead generate:
	//
	// A
	// \| \
	// \| C
	// \| /\|
	// \|/ \|
	// \| \|
	// \| D
	// \| /
	// E
	//
	// A: X = ...; Y = ...
	// D: empty
	// E: PHI [X, A], [X, C], [Y, D]
	//
	// Which, in our sitofp/fcmp example, gives us something like:
	//
	// ucomiss %xmm1, %xmm0
	// movss <1.0f>, %xmm0
	// jne .LBB5_4
	// jp .LBB5_4
	// xorps %xmm0, %xmm0
	// .LBB5_4:
	// retq
	//
	MachineInstr *CascadedCMOV = nullptr;
	MachineInstr *LastCMOV = &MI;
	X86::CondCode CC = X86::CondCode(MI.getOperand(3).getImm());
	X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
	MachineBasicBlock::iterator NextMIIt =
	std::next(MachineBasicBlock::iterator(MI));

	// Check for case 1, where there are multiple CMOVs with the same condition
	// first. Of the two cases of multiple CMOV lowerings, case 1 reduces the
	// number of jumps the most.

	if (isCMOVPseudo(MI)) {
	// See if we have a string of CMOVS with the same condition.
	while (NextMIIt != BB->end() && isCMOVPseudo(*NextMIIt) &&
	(NextMIIt->getOperand(3).getImm() == CC \|\|
	NextMIIt->getOperand(3).getImm() == OppCC)) {
	LastCMOV = &*NextMIIt;
	++NextMIIt;
	}
	}

	// This checks for case 2, but only do this if we didn't already find
	// case 1, as indicated by LastCMOV == MI.
	if (LastCMOV == &MI && NextMIIt != BB->end() &&
	NextMIIt->getOpcode() == MI.getOpcode() &&
	NextMIIt->getOperand(2).getReg() == MI.getOperand(2).getReg() &&
	NextMIIt->getOperand(1).getReg() == MI.getOperand(0).getReg() &&
	NextMIIt->getOperand(1).isKill()) {
	CascadedCMOV = &*NextMIIt;
	}

	MachineBasicBlock *jcc1MBB = nullptr;

	// If we have a cascaded CMOV, we lower it to two successive branches to
	// the same block. EFLAGS is used by both, so mark it as live in the second.
	if (CascadedCMOV) {
	jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB);
	F->insert(It, jcc1MBB);
	jcc1MBB->addLiveIn(X86::EFLAGS);
	}

	MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
	F->insert(It, copy0MBB);
	F->insert(It, sinkMBB);

	// If the EFLAGS register isn't dead in the terminator, then claim that it's
	// live into the sink and copy blocks.
	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();

	MachineInstr *LastEFLAGSUser = CascadedCMOV ? CascadedCMOV : LastCMOV;
	if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
	!checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) {
	copy0MBB->addLiveIn(X86::EFLAGS);
	sinkMBB->addLiveIn(X86::EFLAGS);
	}

	// Transfer the remainder of BB and its successor edges to sinkMBB.
	sinkMBB->splice(sinkMBB->begin(), BB,
	std::next(MachineBasicBlock::iterator(LastCMOV)), BB->end());
	sinkMBB->transferSuccessorsAndUpdatePHIs(BB);

	// Add the true and fallthrough blocks as its successors.
	if (CascadedCMOV) {
	// The fallthrough block may be jcc1MBB, if we have a cascaded CMOV.
	BB->addSuccessor(jcc1MBB);

	// In that case, jcc1MBB will itself fallthrough the copy0MBB, and
	// jump to the sinkMBB.
	jcc1MBB->addSuccessor(copy0MBB);
	jcc1MBB->addSuccessor(sinkMBB);
	} else {
	BB->addSuccessor(copy0MBB);
	}

	// The true block target of the first (or only) branch is always sinkMBB.
	BB->addSuccessor(sinkMBB);

	// Create the conditional branch instruction.
	unsigned Opc = X86::GetCondBranchFromCond(CC);
	BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);

	if (CascadedCMOV) {
	unsigned Opc2 = X86::GetCondBranchFromCond(
	(X86::CondCode)CascadedCMOV->getOperand(3).getImm());
	BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB);
	}

	// copy0MBB:
	// %FalseValue = ...
	// # fallthrough to sinkMBB
	copy0MBB->addSuccessor(sinkMBB);

	// sinkMBB:
	// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
	// ...
	MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
	MachineBasicBlock::iterator MIItEnd =
	std::next(MachineBasicBlock::iterator(LastCMOV));
	MachineBasicBlock::iterator SinkInsertionPoint = sinkMBB->begin();
	DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
	MachineInstrBuilder MIB;

	// As we are creating the PHIs, we have to be careful if there is more than
	// one. Later CMOVs may reference the results of earlier CMOVs, but later
	// PHIs have to reference the individual true/false inputs from earlier PHIs.
	// That also means that PHI construction must work forward from earlier to
	// later, and that the code must maintain a mapping from earlier PHI's
	// destination registers, and the registers that went into the PHI.

	for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
	unsigned DestReg = MIIt->getOperand(0).getReg();
	unsigned Op1Reg = MIIt->getOperand(1).getReg();
	unsigned Op2Reg = MIIt->getOperand(2).getReg();

	// If this CMOV we are generating is the opposite condition from
	// the jump we generated, then we have to swap the operands for the
	// PHI that is going to be generated.
	if (MIIt->getOperand(3).getImm() == OppCC)
	std::swap(Op1Reg, Op2Reg);

	if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end())
	Op1Reg = RegRewriteTable[Op1Reg].first;

	if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end())
	Op2Reg = RegRewriteTable[Op2Reg].second;

	MIB = BuildMI(*sinkMBB, SinkInsertionPoint, DL,
	TII->get(X86::PHI), DestReg)
	.addReg(Op1Reg).addMBB(copy0MBB)
	.addReg(Op2Reg).addMBB(thisMBB);

	// Add this PHI to the rewrite table.
	RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
	}

	// If we have a cascaded CMOV, the second Jcc provides the same incoming
	// value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes).
	if (CascadedCMOV) {
	MIB.addReg(MI.getOperand(2).getReg()).addMBB(jcc1MBB);
	// Copy the PHI result to the register defined by the second CMOV.
	BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())),
	DL, TII->get(TargetOpcode::COPY),
	CascadedCMOV->getOperand(0).getReg())
	.addReg(MI.getOperand(0).getReg());
	CascadedCMOV->eraseFromParent();
	}

	// Now remove the CMOV(s).
	for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; )
	(MIIt++)->eraseFromParent();

	return sinkMBB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredAtomicFP(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	// Combine the following atomic floating-point modification pattern:
	// a.store(reg OP a.load(acquire), release)
	// Transform them into:
	// OPss (%gpr), %xmm
	// movss %xmm, (%gpr)
	// Or sd equivalent for 64-bit operations.
	unsigned MOp, FOp;
	switch (MI.getOpcode()) {
	default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP");
	case X86::RELEASE_FADD32mr:
	FOp = X86::ADDSSrm;
	MOp = X86::MOVSSmr;
	break;
	case X86::RELEASE_FADD64mr:
	FOp = X86::ADDSDrm;
	MOp = X86::MOVSDmr;
	break;
	}
	const X86InstrInfo *TII = Subtarget.getInstrInfo();
	DebugLoc DL = MI.getDebugLoc();
	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
	unsigned ValOpIdx = X86::AddrNumOperands;
	unsigned VSrc = MI.getOperand(ValOpIdx).getReg();
	MachineInstrBuilder MIB =
	BuildMI(*BB, MI, DL, TII->get(FOp),
	MRI.createVirtualRegister(MRI.getRegClass(VSrc)))
	.addReg(VSrc);
	for (int i = 0; i < X86::AddrNumOperands; ++i) {
	MachineOperand &Operand = MI.getOperand(i);
	// Clear any kill flags on register operands as we'll create a second
	// instruction using the same address operands.
	if (Operand.isReg())
	Operand.setIsKill(false);
	MIB.add(Operand);
	}
	MachineInstr *FOpMI = MIB;
	MIB = BuildMI(*BB, MI, DL, TII->get(MOp));
	for (int i = 0; i < X86::AddrNumOperands; ++i)
	MIB.add(MI.getOperand(i));
	MIB.addReg(FOpMI->getOperand(0).getReg(), RegState::Kill);
	MI.eraseFromParent(); // The pseudo instruction is gone now.
	return BB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	MachineFunction *MF = BB->getParent();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	DebugLoc DL = MI.getDebugLoc();
	const BasicBlock *LLVM_BB = BB->getBasicBlock();

	assert(MF->shouldSplitStack());

	const bool Is64Bit = Subtarget.is64Bit();
	const bool IsLP64 = Subtarget.isTarget64BitLP64();

	const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
	const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30;

	// BB:
	// ... [Till the alloca]
	// If stacklet is not large enough, jump to mallocMBB
	//
	// bumpMBB:
	// Allocate by subtracting from RSP
	// Jump to continueMBB
	//
	// mallocMBB:
	// Allocate by call to runtime
	//
	// continueMBB:
	// ...
	// [rest of original BB]
	//

	MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB);

	MachineRegisterInfo &MRI = MF->getRegInfo();
	const TargetRegisterClass *AddrRegClass =
	getRegClassFor(getPointerTy(MF->getDataLayout()));

	unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
	bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
	tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
	SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
	sizeVReg = MI.getOperand(1).getReg(),
	physSPReg =
	IsLP64 \|\| Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP;

	MachineFunction::iterator MBBIter = ++BB->getIterator();

	MF->insert(MBBIter, bumpMBB);
	MF->insert(MBBIter, mallocMBB);
	MF->insert(MBBIter, continueMBB);

	continueMBB->splice(continueMBB->begin(), BB,
	std::next(MachineBasicBlock::iterator(MI)), BB->end());
	continueMBB->transferSuccessorsAndUpdatePHIs(BB);

	// Add code to the main basic block to check if the stack limit has been hit,
	// and if so, jump to mallocMBB otherwise to bumpMBB.
	BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
	BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
	.addReg(tmpSPVReg).addReg(sizeVReg);
	BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
	.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
	.addReg(SPLimitVReg);
	BuildMI(BB, DL, TII->get(X86::JG_1)).addMBB(mallocMBB);

	// bumpMBB simply decreases the stack pointer, since we know the current
	// stacklet has enough space.
	BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
	.addReg(SPLimitVReg);
	BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
	.addReg(SPLimitVReg);
	BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);

	// Calls into a routine in libgcc to allocate more space from the heap.
	const uint32_t *RegMask =
	Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
	if (IsLP64) {
	BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
	.addReg(sizeVReg);
	BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
	.addExternalSymbol("__morestack_allocate_stack_space")
	.addRegMask(RegMask)
	.addReg(X86::RDI, RegState::Implicit)
	.addReg(X86::RAX, RegState::ImplicitDefine);
	} else if (Is64Bit) {
	BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI)
	.addReg(sizeVReg);
	BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
	.addExternalSymbol("__morestack_allocate_stack_space")
	.addRegMask(RegMask)
	.addReg(X86::EDI, RegState::Implicit)
	.addReg(X86::EAX, RegState::ImplicitDefine);
	} else {
	BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
	.addImm(12);
	BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
	BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
	.addExternalSymbol("__morestack_allocate_stack_space")
	.addRegMask(RegMask)
	.addReg(X86::EAX, RegState::ImplicitDefine);
	}

	if (!Is64Bit)
	BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
	.addImm(16);

	BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
	.addReg(IsLP64 ? X86::RAX : X86::EAX);
	BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);

	// Set up the CFG correctly.
	BB->addSuccessor(bumpMBB);
	BB->addSuccessor(mallocMBB);
	mallocMBB->addSuccessor(continueMBB);
	bumpMBB->addSuccessor(continueMBB);

	// Take care of the PHI nodes.
	BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI),
	MI.getOperand(0).getReg())
	.addReg(mallocPtrVReg)
	.addMBB(mallocMBB)
	.addReg(bumpSPPtrVReg)
	.addMBB(bumpMBB);

	// Delete the original pseudo instruction.
	MI.eraseFromParent();

	// And we're done.
	return continueMBB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	MachineFunction *MF = BB->getParent();
	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
	MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
	DebugLoc DL = MI.getDebugLoc();

	assert(!isAsynchronousEHPersonality(
	classifyEHPersonality(MF->getFunction()->getPersonalityFn())) &&
	"SEH does not use catchret!");

	// Only 32-bit EH needs to worry about manually restoring stack pointers.
	if (!Subtarget.is32Bit())
	return BB;

	// C++ EH creates a new target block to hold the restore code, and wires up
	// the new block to the return destination with a normal JMP_4.
	MachineBasicBlock *RestoreMBB =
	MF->CreateMachineBasicBlock(BB->getBasicBlock());
	assert(BB->succ_size() == 1);
	MF->insert(std::next(BB->getIterator()), RestoreMBB);
	RestoreMBB->transferSuccessorsAndUpdatePHIs(BB);
	BB->addSuccessor(RestoreMBB);
	MI.getOperand(0).setMBB(RestoreMBB);

	auto RestoreMBBI = RestoreMBB->begin();
	BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::EH_RESTORE));
	BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB);
	return BB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredCatchPad(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	MachineFunction *MF = BB->getParent();
	const Constant *PerFn = MF->getFunction()->getPersonalityFn();
	bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn));
	// Only 32-bit SEH requires special handling for catchpad.
	if (IsSEH && Subtarget.is32Bit()) {
	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
	DebugLoc DL = MI.getDebugLoc();
	BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE));
	}
	MI.eraseFromParent();
	return BB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	// So, here we replace TLSADDR with the sequence:
	// adjust_stackdown -> TLSADDR -> adjust_stackup.
	// We need this because TLSADDR is lowered into calls
	// inside MC, therefore without the two markers shrink-wrapping
	// may push the prologue/epilogue pass them.
	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
	DebugLoc DL = MI.getDebugLoc();
	MachineFunction &MF = *BB->getParent();

	// Emit CALLSEQ_START right before the instruction.
	unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
	MachineInstrBuilder CallseqStart =
	BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
	BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);

	// Emit CALLSEQ_END right after the instruction.
	// We don't call erase from parent because we want to keep the
	// original instruction around.
	unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
	MachineInstrBuilder CallseqEnd =
	BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0);
	BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);

	return BB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	// This is pretty easy. We're taking the value that we received from
	// our load from the relocation, sticking it in either RDI (x86-64)
	// or EAX and doing an indirect call. The return value will then
	// be in the normal return register.
	MachineFunction *F = BB->getParent();
	const X86InstrInfo *TII = Subtarget.getInstrInfo();
	DebugLoc DL = MI.getDebugLoc();

	assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?");
	assert(MI.getOperand(3).isGlobal() && "This should be a global");

	// Get a register mask for the lowered call.
	// FIXME: The 32-bit calls have non-standard calling conventions. Use a
	// proper register mask.
	const uint32_t *RegMask =
	Subtarget.is64Bit() ?
	Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() :
	Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
	if (Subtarget.is64Bit()) {
	MachineInstrBuilder MIB =
	BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI)
	.addReg(X86::RIP)
	.addImm(0)
	.addReg(0)
	.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
	MI.getOperand(3).getTargetFlags())
	.addReg(0);
	MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
	addDirectMem(MIB, X86::RDI);
	MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
	} else if (!isPositionIndependent()) {
	MachineInstrBuilder MIB =
	BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX)
	.addReg(0)
	.addImm(0)
	.addReg(0)
	.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
	MI.getOperand(3).getTargetFlags())
	.addReg(0);
	MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
	addDirectMem(MIB, X86::EAX);
	MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
	} else {
	MachineInstrBuilder MIB =
	BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX)
	.addReg(TII->getGlobalBaseReg(F))
	.addImm(0)
	.addReg(0)
	.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
	MI.getOperand(3).getTargetFlags())
	.addReg(0);
	MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
	addDirectMem(MIB, X86::EAX);
	MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
	}

	MI.eraseFromParent(); // The pseudo instruction is gone now.
	return BB;
	}

	MachineBasicBlock *
	X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	DebugLoc DL = MI.getDebugLoc();
	MachineFunction *MF = MBB->getParent();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
	MachineRegisterInfo &MRI = MF->getRegInfo();

	const BasicBlock *BB = MBB->getBasicBlock();
	MachineFunction::iterator I = ++MBB->getIterator();

	// Memory Reference
	MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
	MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();

	unsigned DstReg;
	unsigned MemOpndSlot = 0;

	unsigned CurOp = 0;

	DstReg = MI.getOperand(CurOp++).getReg();
	const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
	assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
	(void)TRI;
	unsigned mainDstReg = MRI.createVirtualRegister(RC);
	unsigned restoreDstReg = MRI.createVirtualRegister(RC);

	MemOpndSlot = CurOp;

	MVT PVT = getPointerTy(MF->getDataLayout());
	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) &&
	"Invalid Pointer Size!");

	// For v = setjmp(buf), we generate
	//
	// thisMBB:
	// buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB
	// SjLjSetup restoreMBB
	//
	// mainMBB:
	// v_main = 0
	//
	// sinkMBB:
	// v = phi(main, restore)
	//
	// restoreMBB:
	// if base pointer being used, load it from frame
	// v_restore = 1

	MachineBasicBlock *thisMBB = MBB;
	MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
	MF->insert(I, mainMBB);
	MF->insert(I, sinkMBB);
	MF->push_back(restoreMBB);
	restoreMBB->setHasAddressTaken();

	MachineInstrBuilder MIB;

	// Transfer the remainder of BB and its successor edges to sinkMBB.
	sinkMBB->splice(sinkMBB->begin(), MBB,
	std::next(MachineBasicBlock::iterator(MI)), MBB->end());
	sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);

	// thisMBB:
	unsigned PtrStoreOpc = 0;
	unsigned LabelReg = 0;
	const int64_t LabelOffset = 1 * PVT.getStoreSize();
	bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&
	!isPositionIndependent();

	// Prepare IP either in reg or imm.
	if (!UseImmLabel) {
	PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
	const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
	LabelReg = MRI.createVirtualRegister(PtrRC);
	if (Subtarget.is64Bit()) {
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
	.addReg(X86::RIP)
	.addImm(0)
	.addReg(0)
	.addMBB(restoreMBB)
	.addReg(0);
	} else {
	const X86InstrInfo XII = static_cast<const X86InstrInfo>(TII);
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg)
	.addReg(XII->getGlobalBaseReg(MF))
	.addImm(0)
	.addReg(0)
	.addMBB(restoreMBB, Subtarget.classifyBlockAddressReference())
	.addReg(0);
	}
	} else
	PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
	// Store IP
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc));
	for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
	if (i == X86::AddrDisp)
	MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset);
	else
	MIB.add(MI.getOperand(MemOpndSlot + i));
	}
	if (!UseImmLabel)
	MIB.addReg(LabelReg);
	else
	MIB.addMBB(restoreMBB);
	MIB.setMemRefs(MMOBegin, MMOEnd);
	// Setup
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
	.addMBB(restoreMBB);

	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	MIB.addRegMask(RegInfo->getNoPreservedMask());
	thisMBB->addSuccessor(mainMBB);
	thisMBB->addSuccessor(restoreMBB);

	// mainMBB:
	// EAX = 0
	BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg);
	mainMBB->addSuccessor(sinkMBB);

	// sinkMBB:
	BuildMI(*sinkMBB, sinkMBB->begin(), DL,
	TII->get(X86::PHI), DstReg)
	.addReg(mainDstReg).addMBB(mainMBB)
	.addReg(restoreDstReg).addMBB(restoreMBB);

	// restoreMBB:
	if (RegInfo->hasBasePointer(*MF)) {
	const bool Uses64BitFramePtr =
	Subtarget.isTarget64BitLP64() \|\| Subtarget.isTargetNaCl64();
	X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
	X86FI->setRestoreBasePointer(MF);
	unsigned FramePtr = RegInfo->getFrameRegister(*MF);
	unsigned BasePtr = RegInfo->getBaseRegister();
	unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
	addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr),
	FramePtr, true, X86FI->getRestoreBasePointerOffset())
	.setMIFlag(MachineInstr::FrameSetup);
	}
	BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
	BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
	restoreMBB->addSuccessor(sinkMBB);

	MI.eraseFromParent();
	return sinkMBB;
	}

	MachineBasicBlock *
	X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	DebugLoc DL = MI.getDebugLoc();
	MachineFunction *MF = MBB->getParent();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	MachineRegisterInfo &MRI = MF->getRegInfo();

	// Memory Reference
	MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
	MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();

	MVT PVT = getPointerTy(MF->getDataLayout());
	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) &&
	"Invalid Pointer Size!");

	const TargetRegisterClass *RC =
	(PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
	unsigned Tmp = MRI.createVirtualRegister(RC);
	// Since FP is only updated here but NOT referenced, it's treated as GPR.
	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
	unsigned SP = RegInfo->getStackRegister();

	MachineInstrBuilder MIB;

	const int64_t LabelOffset = 1 * PVT.getStoreSize();
	const int64_t SPOffset = 2 * PVT.getStoreSize();

	unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
	unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;

	// Reload FP
	MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
	for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
	MIB.add(MI.getOperand(i));
	MIB.setMemRefs(MMOBegin, MMOEnd);
	// Reload IP
	MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
	for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
	if (i == X86::AddrDisp)
	MIB.addDisp(MI.getOperand(i), LabelOffset);
	else
	MIB.add(MI.getOperand(i));
	}
	MIB.setMemRefs(MMOBegin, MMOEnd);
	// Reload SP
	MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
	for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
	if (i == X86::AddrDisp)
	MIB.addDisp(MI.getOperand(i), SPOffset);
	else
	MIB.add(MI.getOperand(i));
	}
	MIB.setMemRefs(MMOBegin, MMOEnd);
	// Jump
	BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);

	MI.eraseFromParent();
	return MBB;
	}

	void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
	MachineBasicBlock *MBB,
	MachineBasicBlock *DispatchBB,
	int FI) const {
	DebugLoc DL = MI.getDebugLoc();
	MachineFunction *MF = MBB->getParent();
	MachineRegisterInfo *MRI = &MF->getRegInfo();
	const X86InstrInfo *TII = Subtarget.getInstrInfo();

	MVT PVT = getPointerTy(MF->getDataLayout());
	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) && "Invalid Pointer Size!");

	unsigned Op = 0;
	unsigned VR = 0;

	bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&
	!isPositionIndependent();

	if (UseImmLabel) {
	Op = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
	} else {
	const TargetRegisterClass *TRC =
	(PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
	VR = MRI->createVirtualRegister(TRC);
	Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;

	if (Subtarget.is64Bit())
	BuildMI(*MBB, MI, DL, TII->get(X86::LEA64r), VR)
	.addReg(X86::RIP)
	.addImm(1)
	.addReg(0)
	.addMBB(DispatchBB)
	.addReg(0);
	else
	BuildMI(*MBB, MI, DL, TII->get(X86::LEA32r), VR)
	.addReg(0) /* TII->getGlobalBaseReg(MF) */
	.addImm(1)
	.addReg(0)
	.addMBB(DispatchBB, Subtarget.classifyBlockAddressReference())
	.addReg(0);
	}

	MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(Op));
	addFrameReference(MIB, FI, 36);
	if (UseImmLabel)
	MIB.addMBB(DispatchBB);
	else
	MIB.addReg(VR);
	}

	MachineBasicBlock *
	X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	DebugLoc DL = MI.getDebugLoc();
	MachineFunction *MF = BB->getParent();
	MachineFrameInfo &MFI = MF->getFrameInfo();
	MachineRegisterInfo *MRI = &MF->getRegInfo();
	const X86InstrInfo *TII = Subtarget.getInstrInfo();
	int FI = MFI.getFunctionContextIndex();

	// Get a mapping of the call site numbers to all of the landing pads they're
	// associated with.
	DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
	unsigned MaxCSNum = 0;
	for (auto &MBB : *MF) {
	if (!MBB.isEHPad())
	continue;

	MCSymbol *Sym = nullptr;
	for (const auto &MI : MBB) {
	if (MI.isDebugValue())
	continue;

	assert(MI.isEHLabel() && "expected EH_LABEL");
	Sym = MI.getOperand(0).getMCSymbol();
	break;
	}

	if (!MF->hasCallSiteLandingPad(Sym))
	continue;

	for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
	CallSiteNumToLPad[CSI].push_back(&MBB);
	MaxCSNum = std::max(MaxCSNum, CSI);
	}
	}

	// Get an ordered list of the machine basic blocks for the jump table.
	std::vector<MachineBasicBlock *> LPadList;
	SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
	LPadList.reserve(CallSiteNumToLPad.size());

	for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
	for (auto &LP : CallSiteNumToLPad[CSI]) {
	LPadList.push_back(LP);
	InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
	}
	}

	assert(!LPadList.empty() &&
	"No landing pad destinations for the dispatch jump table!");

	// Create the MBBs for the dispatch code.

	// Shove the dispatch's address into the return slot in the function context.
	MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
	DispatchBB->setIsEHPad(true);

	MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
	BuildMI(TrapBB, DL, TII->get(X86::TRAP));
	DispatchBB->addSuccessor(TrapBB);

	MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
	DispatchBB->addSuccessor(DispContBB);

	// Insert MBBs.
	MF->push_back(DispatchBB);
	MF->push_back(DispContBB);
	MF->push_back(TrapBB);

	// Insert code into the entry block that creates and registers the function
	// context.
	SetupEntryBlockForSjLj(MI, BB, DispatchBB, FI);

	// Create the jump table and associated information
	MachineJumpTableInfo *JTI =
	MF->getOrCreateJumpTableInfo(getJumpTableEncoding());
	unsigned MJTI = JTI->createJumpTableIndex(LPadList);

	const X86RegisterInfo &RI = TII->getRegisterInfo();
	// Add a register mask with no preserved registers. This results in all
	// registers being marked as clobbered.
	if (RI.hasBasePointer(*MF)) {
	const bool FPIs64Bit =
	Subtarget.isTarget64BitLP64() \|\| Subtarget.isTargetNaCl64();
	X86MachineFunctionInfo *MFI = MF->getInfo<X86MachineFunctionInfo>();
	MFI->setRestoreBasePointer(MF);

	unsigned FP = RI.getFrameRegister(*MF);
	unsigned BP = RI.getBaseRegister();
	unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm;
	addRegOffset(BuildMI(DispatchBB, DL, TII->get(Op), BP), FP, true,
	MFI->getRestoreBasePointerOffset())
	.addRegMask(RI.getNoPreservedMask());
	} else {
	BuildMI(DispatchBB, DL, TII->get(X86::NOOP))
	.addRegMask(RI.getNoPreservedMask());
	}

	unsigned IReg = MRI->createVirtualRegister(&X86::GR32RegClass);
	addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI,
	4);
	BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri))
	.addReg(IReg)
	.addImm(LPadList.size());
	BuildMI(DispatchBB, DL, TII->get(X86::JA_1)).addMBB(TrapBB);

	unsigned JReg = MRI->createVirtualRegister(&X86::GR32RegClass);
	BuildMI(DispContBB, DL, TII->get(X86::SUB32ri), JReg)
	.addReg(IReg)
	.addImm(1);
	BuildMI(DispContBB, DL,
	TII->get(Subtarget.is64Bit() ? X86::JMP64m : X86::JMP32m))
	.addReg(0)
	.addImm(Subtarget.is64Bit() ? 8 : 4)
	.addReg(JReg)
	.addJumpTableIndex(MJTI)
	.addReg(0);

	// Add the jump table entries as successors to the MBB.
	SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
	for (auto &LP : LPadList)
	if (SeenMBBs.insert(LP).second)
	DispContBB->addSuccessor(LP);

	// N.B. the order the invoke BBs are processed in doesn't matter here.
	SmallVector<MachineBasicBlock *, 64> MBBLPads;
	const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
	for (MachineBasicBlock *MBB : InvokeBBs) {
	// Remove the landing pad successor from the invoke block and replace it
	// with the new dispatch block.
	// Keep a copy of Successors since it's modified inside the loop.
	SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
	MBB->succ_rend());
	// FIXME: Avoid quadratic complexity.
	for (auto MBBS : Successors) {
	if (MBBS->isEHPad()) {
	MBB->removeSuccessor(MBBS);
	MBBLPads.push_back(MBBS);
	}
	}

	MBB->addSuccessor(DispatchBB);

	// Find the invoke call and mark all of the callee-saved registers as
	// 'implicit defined' so that they're spilled. This prevents code from
	// moving instructions to before the EH block, where they will never be
	// executed.
	for (auto &II : reverse(*MBB)) {
	if (!II.isCall())
	continue;

	DenseMap<unsigned, bool> DefRegs;
	for (auto &MOp : II.operands())
	if (MOp.isReg())
	DefRegs[MOp.getReg()] = true;

	MachineInstrBuilder MIB(*MF, &II);
	for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
	unsigned Reg = SavedRegs[RI];
	if (!DefRegs[Reg])
	MIB.addReg(Reg, RegState::ImplicitDefine \| RegState::Dead);
	}

	break;
	}
	}

	// Mark all former landing pads as non-landing pads. The dispatch is the only
	// landing pad now.
	for (auto &LP : MBBLPads)
	LP->setIsEHPad(false);

	// The instruction is gone now.
	MI.eraseFromParent();
	return BB;
	}

	MachineBasicBlock *
	X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	MachineFunction *MF = BB->getParent();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	DebugLoc DL = MI.getDebugLoc();

	switch (MI.getOpcode()) {
	default: llvm_unreachable("Unexpected instr type to insert");
	case X86::TAILJMPd64:
	case X86::TAILJMPr64:
	case X86::TAILJMPm64:
	case X86::TAILJMPr64_REX:
	case X86::TAILJMPm64_REX:
	llvm_unreachable("TAILJMP64 would not be touched here.");
	case X86::TCRETURNdi64:
	case X86::TCRETURNri64:
	case X86::TCRETURNmi64:
	return BB;
	case X86::TLS_addr32:
	case X86::TLS_addr64:
	case X86::TLS_base_addr32:
	case X86::TLS_base_addr64:
	return EmitLoweredTLSAddr(MI, BB);
	case X86::CATCHRET:
	return EmitLoweredCatchRet(MI, BB);
	case X86::CATCHPAD:
	return EmitLoweredCatchPad(MI, BB);
	case X86::SEG_ALLOCA_32:
	case X86::SEG_ALLOCA_64:
	return EmitLoweredSegAlloca(MI, BB);
	case X86::TLSCall_32:
	case X86::TLSCall_64:
	return EmitLoweredTLSCall(MI, BB);
	case X86::CMOV_FR32:
	case X86::CMOV_FR64:
	case X86::CMOV_FR128:
	case X86::CMOV_GR8:
	case X86::CMOV_GR16:
	case X86::CMOV_GR32:
	case X86::CMOV_RFP32:
	case X86::CMOV_RFP64:
	case X86::CMOV_RFP80:
	case X86::CMOV_V2F64:
	case X86::CMOV_V2I64:
	case X86::CMOV_V4F32:
	case X86::CMOV_V4F64:
	case X86::CMOV_V4I64:
	case X86::CMOV_V16F32:
	case X86::CMOV_V8F32:
	case X86::CMOV_V8F64:
	case X86::CMOV_V8I64:
	case X86::CMOV_V8I1:
	case X86::CMOV_V16I1:
	case X86::CMOV_V32I1:
	case X86::CMOV_V64I1:
	return EmitLoweredSelect(MI, BB);

	case X86::RDFLAGS32:
	case X86::RDFLAGS64: {
	unsigned PushF =
	MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
	unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
	MachineInstr Push = BuildMI(BB, MI, DL, TII->get(PushF));
	// Permit reads of the FLAGS register without it being defined.
	// This intrinsic exists to read external processor state in flags, such as
	// the trap flag, interrupt flag, and direction flag, none of which are
	// modeled by the backend.
	Push->getOperand(2).setIsUndef();
	BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg());

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	case X86::WRFLAGS32:
	case X86::WRFLAGS64: {
	unsigned Push =
	MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
	unsigned PopF =
	MI.getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64;
	BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI.getOperand(0).getReg());
	BuildMI(*BB, MI, DL, TII->get(PopF));

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	case X86::RELEASE_FADD32mr:
	case X86::RELEASE_FADD64mr:
	return EmitLoweredAtomicFP(MI, BB);

	case X86::FP32_TO_INT16_IN_MEM:
	case X86::FP32_TO_INT32_IN_MEM:
	case X86::FP32_TO_INT64_IN_MEM:
	case X86::FP64_TO_INT16_IN_MEM:
	case X86::FP64_TO_INT32_IN_MEM:
	case X86::FP64_TO_INT64_IN_MEM:
	case X86::FP80_TO_INT16_IN_MEM:
	case X86::FP80_TO_INT32_IN_MEM:
	case X86::FP80_TO_INT64_IN_MEM: {
	// Change the floating point control register to use "round towards zero"
	// mode when truncating to an integer value.
	int CWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
	addFrameReference(BuildMI(*BB, MI, DL,
	TII->get(X86::FNSTCW16m)), CWFrameIdx);

	// Load the old value of the high byte of the control word...
	unsigned OldCW =
	MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
	addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
	CWFrameIdx);

	// Set the high part to be round to zero...
	addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
	.addImm(0xC7F);

	// Reload the modified control word now...
	addFrameReference(BuildMI(*BB, MI, DL,
	TII->get(X86::FLDCW16m)), CWFrameIdx);

	// Restore the memory image of control word to original value
	addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
	.addReg(OldCW);

	// Get the X86 opcode to use.
	unsigned Opc;
	switch (MI.getOpcode()) {
	default: llvm_unreachable("illegal opcode!");
	case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
	case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
	case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
	case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
	case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
	case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
	case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
	case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
	case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
	}

	X86AddressMode AM = getAddressFromInstr(&MI, 0);
	addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
	.addReg(MI.getOperand(X86::AddrNumOperands).getReg());

	// Reload the original control word now.
	addFrameReference(BuildMI(*BB, MI, DL,
	TII->get(X86::FLDCW16m)), CWFrameIdx);

	MI.eraseFromParent(); // The pseudo instruction is gone now.
	return BB;
	}
	// String/text processing lowering.
	case X86::PCMPISTRM128REG:
	case X86::VPCMPISTRM128REG:
	case X86::PCMPISTRM128MEM:
	case X86::VPCMPISTRM128MEM:
	case X86::PCMPESTRM128REG:
	case X86::VPCMPESTRM128REG:
	case X86::PCMPESTRM128MEM:
	case X86::VPCMPESTRM128MEM:
	assert(Subtarget.hasSSE42() &&
	"Target must have SSE4.2 or AVX features enabled");
	return emitPCMPSTRM(MI, BB, Subtarget.getInstrInfo());

	// String/text processing lowering.
	case X86::PCMPISTRIREG:
	case X86::VPCMPISTRIREG:
	case X86::PCMPISTRIMEM:
	case X86::VPCMPISTRIMEM:
	case X86::PCMPESTRIREG:
	case X86::VPCMPESTRIREG:
	case X86::PCMPESTRIMEM:
	case X86::VPCMPESTRIMEM:
	assert(Subtarget.hasSSE42() &&
	"Target must have SSE4.2 or AVX features enabled");
	return emitPCMPSTRI(MI, BB, Subtarget.getInstrInfo());

	// Thread synchronization.
	case X86::MONITOR:
	return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr);
	case X86::MONITORX:
	return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr);

	// Cache line zero
	case X86::CLZERO:
	return emitClzero(&MI, BB, Subtarget);

	// PKU feature
	case X86::WRPKRU:
	return emitWRPKRU(MI, BB, Subtarget);
	case X86::RDPKRU:
	return emitRDPKRU(MI, BB, Subtarget);
	// xbegin
	case X86::XBEGIN:
	return emitXBegin(MI, BB, Subtarget.getInstrInfo());

	case X86::VASTART_SAVE_XMM_REGS:
	return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);

	case X86::VAARG_64:
	return EmitVAARG64WithCustomInserter(MI, BB);

	case X86::EH_SjLj_SetJmp32:
	case X86::EH_SjLj_SetJmp64:
	return emitEHSjLjSetJmp(MI, BB);

	case X86::EH_SjLj_LongJmp32:
	case X86::EH_SjLj_LongJmp64:
	return emitEHSjLjLongJmp(MI, BB);

	case X86::Int_eh_sjlj_setup_dispatch:
	return EmitSjLjDispatchBlock(MI, BB);

	case TargetOpcode::STATEPOINT:
	// As an implementation detail, STATEPOINT shares the STACKMAP format at
	// this point in the process. We diverge later.
	return emitPatchPoint(MI, BB);

	case TargetOpcode::STACKMAP:
	case TargetOpcode::PATCHPOINT:
	return emitPatchPoint(MI, BB);

	case TargetOpcode::PATCHABLE_EVENT_CALL:
	// Do nothing here, handle in xray instrumentation pass.
	return BB;

	case X86::LCMPXCHG8B: {
	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
	// In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B
	// requires a memory operand. If it happens that current architecture is
	// i686 and for current function we need a base pointer
	// - which is ESI for i686 - register allocator would not be able to
	// allocate registers for an address in form of X(%reg, %reg, Y)
	// - there never would be enough unreserved registers during regalloc
	// (without the need for base ptr the only option would be X(%edi, %esi, Y).
	// We are giving a hand to register allocator by precomputing the address in
	// a new vreg using LEA.

	// If it is not i686 or there is no base pointer - nothing to do here.
	if (!Subtarget.is32Bit() \|\| !TRI->hasBasePointer(*MF))
	return BB;

	// Even though this code does not necessarily needs the base pointer to
	// be ESI, we check for that. The reason: if this assert fails, there are
	// some changes happened in the compiler base pointer handling, which most
	// probably have to be addressed somehow here.
	assert(TRI->getBaseRegister() == X86::ESI &&
	"LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a "
	"base pointer in mind");

	MachineRegisterInfo &MRI = MF->getRegInfo();
	MVT SPTy = getPointerTy(MF->getDataLayout());
	const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
	unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass);

	X86AddressMode AM = getAddressFromInstr(&MI, 0);
	// Regalloc does not need any help when the memory operand of CMPXCHG8B
	// does not use index register.
	if (AM.IndexReg == X86::NoRegister)
	return BB;

	// After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its
	// four operand definitions that are E[ABCD] registers. We skip them and
	// then insert the LEA.
	MachineBasicBlock::iterator MBBI(MI);
	while (MBBI->definesRegister(X86::EAX) \|\| MBBI->definesRegister(X86::EBX) \|\|
	MBBI->definesRegister(X86::ECX) \|\| MBBI->definesRegister(X86::EDX))
	--MBBI;
	addFullAddress(
	BuildMI(BB, MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM);

	setDirectAddressInInstr(&MI, 0, computedAddrVReg);

	return BB;
	}
	case X86::LCMPXCHG16B:
	return BB;
	case X86::LCMPXCHG8B_SAVE_EBX:
	case X86::LCMPXCHG16B_SAVE_RBX: {
	unsigned BasePtr =
	MI.getOpcode() == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX;
	if (!BB->isLiveIn(BasePtr))
	BB->addLiveIn(BasePtr);
	return BB;
	}
	}
	}

	//===----------------------------------------------------------------------===//
	// X86 Optimization Hooks
	//===----------------------------------------------------------------------===//

	void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
	KnownBits &Known,
	const APInt &DemandedElts,
	const SelectionDAG &DAG,
	unsigned Depth) const {
	unsigned BitWidth = Known.getBitWidth();
	unsigned Opc = Op.getOpcode();
	EVT VT = Op.getValueType();
	assert((Opc >= ISD::BUILTIN_OP_END \|\|
	Opc == ISD::INTRINSIC_WO_CHAIN \|\|
	Opc == ISD::INTRINSIC_W_CHAIN \|\|
	Opc == ISD::INTRINSIC_VOID) &&
	"Should use MaskedValueIsZero if you don't know whether Op"
	" is a target node!");

	Known.resetAll();
	switch (Opc) {
	default: break;
	case X86ISD::ADD:
	case X86ISD::SUB:
	case X86ISD::ADC:
	case X86ISD::SBB:
	case X86ISD::SMUL:
	case X86ISD::UMUL:
	case X86ISD::INC:
	case X86ISD::DEC:
	case X86ISD::OR:
	case X86ISD::XOR:
	case X86ISD::AND:
	// These nodes' second result is a boolean.
	if (Op.getResNo() == 0)
	break;
	LLVM_FALLTHROUGH;
	case X86ISD::SETCC:
	Known.Zero.setBitsFrom(1);
	break;
	case X86ISD::MOVMSK: {
	unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements();
	Known.Zero.setBitsFrom(NumLoBits);
	break;
	}
	case X86ISD::VSHLI:
	case X86ISD::VSRLI: {
	if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
	if (ShiftImm->getAPIntValue().uge(VT.getScalarSizeInBits())) {
	Known.setAllZero();
	break;
	}

	DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1);
	unsigned ShAmt = ShiftImm->getZExtValue();
	if (Opc == X86ISD::VSHLI) {
	Known.Zero <<= ShAmt;
	Known.One <<= ShAmt;
	// Low bits are known zero.
	Known.Zero.setLowBits(ShAmt);
	} else {
	Known.Zero.lshrInPlace(ShAmt);
	Known.One.lshrInPlace(ShAmt);
	// High bits are known zero.
	Known.Zero.setHighBits(ShAmt);
	}
	}
	break;
	}
	case X86ISD::VZEXT: {
	SDValue N0 = Op.getOperand(0);
	unsigned NumElts = VT.getVectorNumElements();

	EVT SrcVT = N0.getValueType();
	unsigned InNumElts = SrcVT.getVectorNumElements();
	unsigned InBitWidth = SrcVT.getScalarSizeInBits();
	assert(InNumElts >= NumElts && "Illegal VZEXT input");

	Known = KnownBits(InBitWidth);
	APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts);
	DAG.computeKnownBits(N0, Known, DemandedSrcElts, Depth + 1);
	Known = Known.zext(BitWidth);
	Known.Zero.setBitsFrom(InBitWidth);
	break;
	}
	}
	}

	unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
	unsigned Depth) const {
	unsigned VTBits = Op.getScalarValueSizeInBits();
	unsigned Opcode = Op.getOpcode();
	switch (Opcode) {
	case X86ISD::SETCC_CARRY:
	// SETCC_CARRY sets the dest to ~0 for true or 0 for false.
	return VTBits;

	case X86ISD::VSEXT: {
	SDValue Src = Op.getOperand(0);
	unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
	Tmp += VTBits - Src.getScalarValueSizeInBits();
	return Tmp;
	}

	case X86ISD::VSHLI: {
	SDValue Src = Op.getOperand(0);
	unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
	APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
	if (ShiftVal.uge(VTBits))
	return VTBits; // Shifted all bits out --> zero.
	if (ShiftVal.uge(Tmp))
	return 1; // Shifted all sign bits out --> unknown.
	return Tmp - ShiftVal.getZExtValue();
	}

	case X86ISD::VSRAI: {
	SDValue Src = Op.getOperand(0);
	unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
	APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
	ShiftVal += Tmp;
	return ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
	}

	case X86ISD::PCMPGT:
	case X86ISD::PCMPEQ:
	case X86ISD::CMPP:
	case X86ISD::VPCOM:
	case X86ISD::VPCOMU:
	// Vector compares return zero/all-bits result values.
	return VTBits;
	}

	// Fallback case.
	return 1;
	}

	/// Returns true (and the GlobalValue and the offset) if the node is a
	/// GlobalAddress + offset.
	bool X86TargetLowering::isGAPlusOffset(SDNode *N,
	const GlobalValue* &GA,
	int64_t &Offset) const {
	if (N->getOpcode() == X86ISD::Wrapper) {
	if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
	GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
	Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
	return true;
	}
	}
	return TargetLowering::isGAPlusOffset(N, GA, Offset);
	}

	// Attempt to match a combined shuffle mask against supported unary shuffle
	// instructions.
	// TODO: Investigate sharing more of this with shuffle lowering.
	static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
	bool AllowFloatDomain, bool AllowIntDomain,
	SDValue &V1, SDLoc &DL, SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
	unsigned NumMaskElts = Mask.size();
	unsigned MaskEltSize = MaskVT.getScalarSizeInBits();

	// Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction.
	// TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
	if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
	unsigned MaxScale = 64 / MaskEltSize;
	for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
	bool Match = true;
	unsigned NumDstElts = NumMaskElts / Scale;
	for (unsigned i = 0; i != NumDstElts && Match; ++i) {
	Match &= isUndefOrEqual(Mask[i * Scale], (int)i);
	Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
	}
	if (Match) {
	unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
	SrcVT = MVT::getVectorVT(MaskVT.getScalarType(), SrcSize / MaskEltSize);
	if (SrcVT != MaskVT)
	V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
	DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
	DstVT = MVT::getVectorVT(DstVT, NumDstElts);
	Shuffle = SrcVT != MaskVT ? unsigned(X86ISD::VZEXT)
	: unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
	return true;
	}
	}
	}

	// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
	if (((MaskEltSize == 32) \|\| (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
	isUndefOrEqual(Mask[0], 0) &&
	isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
	Shuffle = X86ISD::VZEXT_MOVL;
	SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
	return true;
	}

	// Check if we have SSE3 which will let us use MOVDDUP etc. The
	// instructions are no slower than UNPCKLPD but has the option to
	// fold the input operand into even an unaligned memory load.
	if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
	if (isTargetShuffleEquivalent(Mask, {0, 0})) {
	Shuffle = X86ISD::MOVDDUP;
	SrcVT = DstVT = MVT::v2f64;
	return true;
	}
	if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
	Shuffle = X86ISD::MOVSLDUP;
	SrcVT = DstVT = MVT::v4f32;
	return true;
	}
	if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3})) {
	Shuffle = X86ISD::MOVSHDUP;
	SrcVT = DstVT = MVT::v4f32;
	return true;
	}
	}

	if (MaskVT.is256BitVector() && AllowFloatDomain) {
	assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");
	if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
	Shuffle = X86ISD::MOVDDUP;
	SrcVT = DstVT = MVT::v4f64;
	return true;
	}
	if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
	Shuffle = X86ISD::MOVSLDUP;
	SrcVT = DstVT = MVT::v8f32;
	return true;
	}
	if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3, 5, 5, 7, 7})) {
	Shuffle = X86ISD::MOVSHDUP;
	SrcVT = DstVT = MVT::v8f32;
	return true;
	}
	}

	if (MaskVT.is512BitVector() && AllowFloatDomain) {
	assert(Subtarget.hasAVX512() &&
	"AVX512 required for 512-bit vector shuffles");
	if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
	Shuffle = X86ISD::MOVDDUP;
	SrcVT = DstVT = MVT::v8f64;
	return true;
	}
	if (isTargetShuffleEquivalent(
	Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14})) {
	Shuffle = X86ISD::MOVSLDUP;
	SrcVT = DstVT = MVT::v16f32;
	return true;
	}
	if (isTargetShuffleEquivalent(
	Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15})) {
	Shuffle = X86ISD::MOVSHDUP;
	SrcVT = DstVT = MVT::v16f32;
	return true;
	}
	}

	// Attempt to match against broadcast-from-vector.
	if (Subtarget.hasAVX2()) {
	SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
	if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
	SrcVT = DstVT = MaskVT;
	Shuffle = X86ISD::VBROADCAST;
	return true;
	}
	}

	return false;
	}

	// Attempt to match a combined shuffle mask against supported unary immediate
	// permute instructions.
	// TODO: Investigate sharing more of this with shuffle lowering.
	static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
	const APInt &Zeroable,
	bool AllowFloatDomain,
	bool AllowIntDomain,
	const X86Subtarget &Subtarget,
	unsigned &Shuffle, MVT &ShuffleVT,
	unsigned &PermuteImm) {
	unsigned NumMaskElts = Mask.size();
	unsigned InputSizeInBits = MaskVT.getSizeInBits();
	unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
	MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);

	bool ContainsZeros =
	llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });

	// Handle VPERMI/VPERMILPD vXi64/vXi64 patterns.
	if (!ContainsZeros && MaskScalarSizeInBits == 64) {
	// Check for lane crossing permutes.
	if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) {
	// PERMPD/PERMQ permutes within a 256-bit vector (AVX2+).
	if (Subtarget.hasAVX2() && MaskVT.is256BitVector()) {
	Shuffle = X86ISD::VPERMI;
	ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64);
	PermuteImm = getV4X86ShuffleImm(Mask);
	return true;
	}
	if (Subtarget.hasAVX512() && MaskVT.is512BitVector()) {
	SmallVector<int, 4> RepeatedMask;
	if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) {
	Shuffle = X86ISD::VPERMI;
	ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64);
	PermuteImm = getV4X86ShuffleImm(RepeatedMask);
	return true;
	}
	}
	} else if (AllowFloatDomain && Subtarget.hasAVX()) {
	// VPERMILPD can permute with a non-repeating shuffle.
	Shuffle = X86ISD::VPERMILPI;
	ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size());
	PermuteImm = 0;
	for (int i = 0, e = Mask.size(); i != e; ++i) {
	int M = Mask[i];
	if (M == SM_SentinelUndef)
	continue;
	assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index");
	PermuteImm \|= (M & 1) << i;
	}
	return true;
	}
	}

	// Handle PSHUFD/VPERMILPI vXi32/vXf32 repeated patterns.
	// AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
	// had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
	if ((MaskScalarSizeInBits == 64 \|\| MaskScalarSizeInBits == 32) &&
	!ContainsZeros && (AllowIntDomain \|\| Subtarget.hasAVX())) {
	SmallVector<int, 4> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
	// Narrow the repeated mask to create 32-bit element permutes.
	SmallVector<int, 4> WordMask = RepeatedMask;
	if (MaskScalarSizeInBits == 64)
	scaleShuffleMask(2, RepeatedMask, WordMask);

	Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI);
	ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
	ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
	PermuteImm = getV4X86ShuffleImm(WordMask);
	return true;
	}
	}

	// Handle PSHUFLW/PSHUFHW vXi16 repeated patterns.
	if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
	SmallVector<int, 4> RepeatedMask;
	if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
	ArrayRef<int> LoMask(Mask.data() + 0, 4);
	ArrayRef<int> HiMask(Mask.data() + 4, 4);

	// PSHUFLW: permute lower 4 elements only.
	if (isUndefOrInRange(LoMask, 0, 4) &&
	isSequentialOrUndefInRange(HiMask, 0, 4, 4)) {
	Shuffle = X86ISD::PSHUFLW;
	ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16);
	PermuteImm = getV4X86ShuffleImm(LoMask);
	return true;
	}

	// PSHUFHW: permute upper 4 elements only.
	if (isUndefOrInRange(HiMask, 4, 8) &&
	isSequentialOrUndefInRange(LoMask, 0, 4, 0)) {
	// Offset the HiMask so that we can create the shuffle immediate.
	int OffsetHiMask[4];
	for (int i = 0; i != 4; ++i)
	OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4);

	Shuffle = X86ISD::PSHUFHW;
	ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16);
	PermuteImm = getV4X86ShuffleImm(OffsetHiMask);
	return true;
	}
	}
	}

	// Attempt to match against byte/bit shifts.
	// FIXME: Add 512-bit support.
	if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
	int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle,
	MaskScalarSizeInBits, Mask,
	0, Zeroable, Subtarget);
	if (0 < ShiftAmt) {
	PermuteImm = (unsigned)ShiftAmt;
	return true;
	}
	}

	return false;
	}

	// Attempt to match a combined unary shuffle mask against supported binary
	// shuffle instructions.
	// TODO: Investigate sharing more of this with shuffle lowering.
	static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
	bool AllowFloatDomain, bool AllowIntDomain,
	SDValue &V1, SDValue &V2, SDLoc &DL,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	unsigned &Shuffle, MVT &ShuffleVT,
	bool IsUnary) {
	unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();

	if (MaskVT.is128BitVector()) {
	if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) {
	V2 = V1;
	Shuffle = X86ISD::MOVLHPS;
	ShuffleVT = MVT::v4f32;
	return true;
	}
	if (isTargetShuffleEquivalent(Mask, {1, 1}) && AllowFloatDomain) {
	V2 = V1;
	Shuffle = X86ISD::MOVHLPS;
	ShuffleVT = MVT::v4f32;
	return true;
	}
	if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() &&
	(AllowFloatDomain \|\| !Subtarget.hasSSE41())) {
	std::swap(V1, V2);
	Shuffle = X86ISD::MOVSD;
	ShuffleVT = MaskVT;
	return true;
	}
	if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) &&
	(AllowFloatDomain \|\| !Subtarget.hasSSE41())) {
	Shuffle = X86ISD::MOVSS;
	ShuffleVT = MaskVT;
	return true;
	}
	}

	// Attempt to match against either a unary or binary UNPCKL/UNPCKH shuffle.
	if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) \|\|
	(MaskVT.is128BitVector() && Subtarget.hasSSE2()) \|\|
	(MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasAVX2()) \|\|
	(MaskVT.is512BitVector() && Subtarget.hasAVX512())) {
	if (matchVectorShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL,
	DAG, Subtarget)) {
	ShuffleVT = MaskVT;
	if (ShuffleVT.is256BitVector() && !Subtarget.hasAVX2())
	ShuffleVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64);
	return true;
	}
	}

	return false;
	}

	static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
	const APInt &Zeroable,
	bool AllowFloatDomain,
	bool AllowIntDomain,
	SDValue &V1, SDValue &V2, SDLoc &DL,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	unsigned &Shuffle, MVT &ShuffleVT,
	unsigned &PermuteImm) {
	unsigned NumMaskElts = Mask.size();
	unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();

	// Attempt to match against PALIGNR byte rotate.
	if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
	int ByteRotation = matchVectorShuffleAsByteRotate(MaskVT, V1, V2, Mask);
	if (0 < ByteRotation) {
	Shuffle = X86ISD::PALIGNR;
	ShuffleVT = MVT::getVectorVT(MVT::i8, MaskVT.getSizeInBits() / 8);
	PermuteImm = ByteRotation;
	return true;
	}
	}

	// Attempt to combine to X86ISD::BLENDI.
	if ((NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) \|\|
	(Subtarget.hasAVX() && MaskVT.is256BitVector()))) \|\|
	(MaskVT == MVT::v16i16 && Subtarget.hasAVX2())) {
	uint64_t BlendMask = 0;
	bool ForceV1Zero = false, ForceV2Zero = false;
	SmallVector<int, 8> TargetMask(Mask.begin(), Mask.end());
	if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero,
	BlendMask)) {
	if (MaskVT == MVT::v16i16) {
	// We can only use v16i16 PBLENDW if the lanes are repeated.
	SmallVector<int, 8> RepeatedMask;
	if (isRepeatedTargetShuffleMask(128, MaskVT, TargetMask,
	RepeatedMask)) {
	assert(RepeatedMask.size() == 8 &&
	"Repeated mask size doesn't match!");
	PermuteImm = 0;
	for (int i = 0; i < 8; ++i)
	if (RepeatedMask[i] >= 8)
	PermuteImm \|= 1 << i;
	V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
	V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
	Shuffle = X86ISD::BLENDI;
	ShuffleVT = MaskVT;
	return true;
	}
	} else {
	// Determine a type compatible with X86ISD::BLENDI.
	ShuffleVT = MaskVT;
	if (Subtarget.hasAVX2()) {
	if (ShuffleVT == MVT::v4i64)
	ShuffleVT = MVT::v8i32;
	else if (ShuffleVT == MVT::v2i64)
	ShuffleVT = MVT::v4i32;
	} else {
	if (ShuffleVT == MVT::v2i64 \|\| ShuffleVT == MVT::v4i32)
	ShuffleVT = MVT::v8i16;
	else if (ShuffleVT == MVT::v4i64)
	ShuffleVT = MVT::v4f64;
	else if (ShuffleVT == MVT::v8i32)
	ShuffleVT = MVT::v8f32;
	}

	if (!ShuffleVT.isFloatingPoint()) {
	int Scale = EltSizeInBits / ShuffleVT.getScalarSizeInBits();
	BlendMask =
	scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale);
	ShuffleVT = MVT::getIntegerVT(EltSizeInBits / Scale);
	ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts * Scale);
	}

	V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
	V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
	PermuteImm = (unsigned)BlendMask;
	Shuffle = X86ISD::BLENDI;
	return true;
	}
	}
	}

	// Attempt to combine to INSERTPS.
	if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
	MaskVT.is128BitVector()) {
	if (Zeroable.getBoolValue() &&
	matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
	Shuffle = X86ISD::INSERTPS;
	ShuffleVT = MVT::v4f32;
	return true;
	}
	}

	// Attempt to combine to SHUFPD.
	if (AllowFloatDomain && EltSizeInBits == 64 &&
	((MaskVT.is128BitVector() && Subtarget.hasSSE2()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasAVX()) \|\|
	(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
	if (matchVectorShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
	Shuffle = X86ISD::SHUFP;
	ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64);
	return true;
	}
	}

	// Attempt to combine to SHUFPS.
	if (AllowFloatDomain && EltSizeInBits == 32 &&
	((MaskVT.is128BitVector() && Subtarget.hasSSE1()) \|\|
	(MaskVT.is256BitVector() && Subtarget.hasAVX()) \|\|
	(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
	SmallVector<int, 4> RepeatedMask;
	if (isRepeatedTargetShuffleMask(128, MaskVT, Mask, RepeatedMask)) {
	// Match each half of the repeated mask, to determine if its just
	// referencing one of the vectors, is zeroable or entirely undef.
	auto MatchHalf = [&](unsigned Offset, int &S0, int &S1) {
	int M0 = RepeatedMask[Offset];
	int M1 = RepeatedMask[Offset + 1];

	if (isUndefInRange(RepeatedMask, Offset, 2)) {
	return DAG.getUNDEF(MaskVT);
	} else if (isUndefOrZeroInRange(RepeatedMask, Offset, 2)) {
	S0 = (SM_SentinelUndef == M0 ? -1 : 0);
	S1 = (SM_SentinelUndef == M1 ? -1 : 1);
	return getZeroVector(MaskVT, Subtarget, DAG, DL);
	} else if (isUndefOrInRange(M0, 0, 4) && isUndefOrInRange(M1, 0, 4)) {
	S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3);
	S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3);
	return V1;
	} else if (isUndefOrInRange(M0, 4, 8) && isUndefOrInRange(M1, 4, 8)) {
	S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3);
	S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3);
	return V2;
	}

	return SDValue();
	};

	int ShufMask[4] = {-1, -1, -1, -1};
	SDValue Lo = MatchHalf(0, ShufMask[0], ShufMask[1]);
	SDValue Hi = MatchHalf(2, ShufMask[2], ShufMask[3]);

	if (Lo && Hi) {
	V1 = Lo;
	V2 = Hi;
	Shuffle = X86ISD::SHUFP;
	ShuffleVT = MVT::getVectorVT(MVT::f32, MaskVT.getSizeInBits() / 32);
	PermuteImm = getV4X86ShuffleImm(ShufMask);
	return true;
	}
	}
	}

	return false;
	}

	/// \brief Combine an arbitrary chain of shuffles into a single instruction if
	/// possible.
	///
	/// This is the leaf of the recursive combine below. When we have found some
	/// chain of single-use x86 shuffle instructions and accumulated the combined
	/// shuffle mask represented by them, this will try to pattern match that mask
	/// into either a single instruction if there is a special purpose instruction
	/// for this operation, or into a PSHUFB instruction which is a fully general
	/// instruction but should only be used to replace chains over a certain depth.
	static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
	ArrayRef<int> BaseMask, int Depth,
	bool HasVariableMask, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!");
	assert((Inputs.size() == 1 \|\| Inputs.size() == 2) &&
	"Unexpected number of shuffle inputs!");

	// Find the inputs that enter the chain. Note that multiple uses are OK
	// here, we're not going to remove the operands we find.
	bool UnaryShuffle = (Inputs.size() == 1);
	SDValue V1 = peekThroughBitcasts(Inputs[0]);
	SDValue V2 = (UnaryShuffle ? DAG.getUNDEF(V1.getValueType())
	: peekThroughBitcasts(Inputs[1]));

	MVT VT1 = V1.getSimpleValueType();
	MVT VT2 = V2.getSimpleValueType();
	MVT RootVT = Root.getSimpleValueType();
	assert(VT1.getSizeInBits() == RootVT.getSizeInBits() &&
	VT2.getSizeInBits() == RootVT.getSizeInBits() &&
	"Vector size mismatch");

	SDLoc DL(Root);
	SDValue Res;

	unsigned NumBaseMaskElts = BaseMask.size();
	if (NumBaseMaskElts == 1) {
	assert(BaseMask[0] == 0 && "Invalid shuffle index found!");
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1),
	/AddTo/ true);
	return true;
	}

	unsigned RootSizeInBits = RootVT.getSizeInBits();
	unsigned NumRootElts = RootVT.getVectorNumElements();
	unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
	bool FloatDomain = VT1.isFloatingPoint() \|\| VT2.isFloatingPoint() \|\|
	(RootVT.is256BitVector() && !Subtarget.hasAVX2());

	// Don't combine if we are a AVX512/EVEX target and the mask element size
	// is different from the root element size - this would prevent writemasks
	// from being reused.
	// TODO - this currently prevents all lane shuffles from occurring.
	// TODO - check for writemasks usage instead of always preventing combining.
	// TODO - attempt to narrow Mask back to writemask size.
	bool IsEVEXShuffle =
	RootSizeInBits == 512 \|\| (Subtarget.hasVLX() && RootSizeInBits >= 128);
	if (IsEVEXShuffle && (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits))
	return false;

	// TODO - handle 128/256-bit lane shuffles of 512-bit vectors.

	// Handle 128-bit lane shuffles of 256-bit vectors.
	// TODO - this should support binary shuffles.
	if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
	!isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
	if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128)
	return false; // Nothing to do!
	MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
	unsigned PermMask = 0;
	PermMask \|= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
	PermMask \|= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);

	Res = DAG.getBitcast(ShuffleVT, V1);
	DCI.AddToWorklist(Res.getNode());
	Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
	DAG.getUNDEF(ShuffleVT),
	DAG.getConstant(PermMask, DL, MVT::i8));
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}

	// For masks that have been widened to 128-bit elements or more,
	// narrow back down to 64-bit elements.
	SmallVector<int, 64> Mask;
	if (BaseMaskEltSizeInBits > 64) {
	assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
	int MaskScale = BaseMaskEltSizeInBits / 64;
	scaleShuffleMask(MaskScale, BaseMask, Mask);
	} else {
	Mask = SmallVector<int, 64>(BaseMask.begin(), BaseMask.end());
	}

	unsigned NumMaskElts = Mask.size();
	unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts;

	// Determine the effective mask value type.
	FloatDomain &= (32 <= MaskEltSizeInBits);
	MVT MaskVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits)
	: MVT::getIntegerVT(MaskEltSizeInBits);
	MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts);

	// Only allow legal mask types.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT))
	return false;

	// Attempt to match the mask against known shuffle patterns.
	MVT ShuffleSrcVT, ShuffleVT;
	unsigned Shuffle, PermuteImm;

	// Which shuffle domains are permitted?
	// Permit domain crossing at higher combine depths.
	bool AllowFloatDomain = FloatDomain \|\| (Depth > 3);
	bool AllowIntDomain = (!FloatDomain \|\| (Depth > 3)) &&
	(!MaskVT.is256BitVector() \|\| Subtarget.hasAVX2());

	// Determine zeroable mask elements.
	APInt Zeroable(NumMaskElts, 0);
	for (unsigned i = 0; i != NumMaskElts; ++i)
	if (isUndefOrZero(Mask[i]))
	Zeroable.setBit(i);

	if (UnaryShuffle) {
	// If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
	// directly if we don't shuffle the lower element and we shuffle the upper
	// (zero) elements within themselves.
	if (V1.getOpcode() == X86ISD::VZEXT_LOAD &&
	(V1.getScalarValueSizeInBits() % MaskEltSizeInBits) == 0) {
	unsigned Scale = V1.getScalarValueSizeInBits() / MaskEltSizeInBits;
	ArrayRef<int> HiMask(Mask.data() + Scale, NumMaskElts - Scale);
	if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) &&
	isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) {
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1),
	/AddTo/ true);
	return true;
	}
	}

	if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
	V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
	ShuffleVT)) {
	if (Depth == 1 && Root.getOpcode() == Shuffle)
	return false; // Nothing to do!
	if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
	return false; // AVX512 Writemask clash.
	Res = DAG.getBitcast(ShuffleSrcVT, V1);
	DCI.AddToWorklist(Res.getNode());
	Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}

	if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
	AllowIntDomain, Subtarget, Shuffle,
	ShuffleVT, PermuteImm)) {
	if (Depth == 1 && Root.getOpcode() == Shuffle)
	return false; // Nothing to do!
	if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
	return false; // AVX512 Writemask clash.
	Res = DAG.getBitcast(ShuffleVT, V1);
	DCI.AddToWorklist(Res.getNode());
	Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
	DAG.getConstant(PermuteImm, DL, MVT::i8));
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}
	}

	if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
	V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT,
	UnaryShuffle)) {
	if (Depth == 1 && Root.getOpcode() == Shuffle)
	return false; // Nothing to do!
	if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
	return false; // AVX512 Writemask clash.
	V1 = DAG.getBitcast(ShuffleVT, V1);
	DCI.AddToWorklist(V1.getNode());
	V2 = DAG.getBitcast(ShuffleVT, V2);
	DCI.AddToWorklist(V2.getNode());
	Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}

	if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
	AllowIntDomain, V1, V2, DL, DAG,
	Subtarget, Shuffle, ShuffleVT,
	PermuteImm)) {
	if (Depth == 1 && Root.getOpcode() == Shuffle)
	return false; // Nothing to do!
	if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
	return false; // AVX512 Writemask clash.
	V1 = DAG.getBitcast(ShuffleVT, V1);
	DCI.AddToWorklist(V1.getNode());
	V2 = DAG.getBitcast(ShuffleVT, V2);
	DCI.AddToWorklist(V2.getNode());
	Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
	DAG.getConstant(PermuteImm, DL, MVT::i8));
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}

	// Typically from here on, we need an integer version of MaskVT.
	MVT IntMaskVT = MVT::getIntegerVT(MaskEltSizeInBits);
	IntMaskVT = MVT::getVectorVT(IntMaskVT, NumMaskElts);

	// Annoyingly, SSE4A instructions don't map into the above match helpers.
	if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) {
	uint64_t BitLen, BitIdx;
	if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
	Zeroable)) {
	if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
	return false; // Nothing to do!
	V1 = DAG.getBitcast(IntMaskVT, V1);
	DCI.AddToWorklist(V1.getNode());
	Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
	DAG.getConstant(BitLen, DL, MVT::i8),
	DAG.getConstant(BitIdx, DL, MVT::i8));
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}

	if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
	if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
	return false; // Nothing to do!
	V1 = DAG.getBitcast(IntMaskVT, V1);
	DCI.AddToWorklist(V1.getNode());
	V2 = DAG.getBitcast(IntMaskVT, V2);
	DCI.AddToWorklist(V2.getNode());
	Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
	DAG.getConstant(BitLen, DL, MVT::i8),
	DAG.getConstant(BitIdx, DL, MVT::i8));
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}
	}

	// Don't try to re-form single instruction chains under any circumstances now
	// that we've done encoding canonicalization for them.
	if (Depth < 2)
	return false;

	bool MaskContainsZeros =
	any_of(Mask, [](int M) { return M == SM_SentinelZero; });

	if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) {
	// If we have a single input lane-crossing shuffle then lower to VPERMV.
	if (UnaryShuffle && (Depth >= 3 \|\| HasVariableMask) && !MaskContainsZeros &&
	((Subtarget.hasAVX2() &&
	(MaskVT == MVT::v8f32 \|\| MaskVT == MVT::v8i32)) \|\|
	(Subtarget.hasAVX512() &&
	(MaskVT == MVT::v8f64 \|\| MaskVT == MVT::v8i64 \|\|
	MaskVT == MVT::v16f32 \|\| MaskVT == MVT::v16i32)) \|\|
	(Subtarget.hasBWI() && MaskVT == MVT::v32i16) \|\|
	(Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) \|\|
	(Subtarget.hasVBMI() && MaskVT == MVT::v64i8) \|\|
	(Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
	SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
	DCI.AddToWorklist(VPermMask.getNode());
	Res = DAG.getBitcast(MaskVT, V1);
	DCI.AddToWorklist(Res.getNode());
	Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res);
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}

	// Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero
	// vector as the second source.
	if (UnaryShuffle && (Depth >= 3 \|\| HasVariableMask) &&
	((Subtarget.hasAVX512() &&
	(MaskVT == MVT::v8f64 \|\| MaskVT == MVT::v8i64 \|\|
	MaskVT == MVT::v16f32 \|\| MaskVT == MVT::v16i32)) \|\|
	(Subtarget.hasVLX() &&
	(MaskVT == MVT::v4f64 \|\| MaskVT == MVT::v4i64 \|\|
	MaskVT == MVT::v8f32 \|\| MaskVT == MVT::v8i32)) \|\|
	(Subtarget.hasBWI() && MaskVT == MVT::v32i16) \|\|
	(Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) \|\|
	(Subtarget.hasVBMI() && MaskVT == MVT::v64i8) \|\|
	(Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
	// Adjust shuffle mask - replace SM_SentinelZero with second source index.
	for (unsigned i = 0; i != NumMaskElts; ++i)
	if (Mask[i] == SM_SentinelZero)
	Mask[i] = NumMaskElts + i;

	SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
	DCI.AddToWorklist(VPermMask.getNode());
	Res = DAG.getBitcast(MaskVT, V1);
	DCI.AddToWorklist(Res.getNode());
	SDValue Zero = getZeroVector(MaskVT, Subtarget, DAG, DL);
	DCI.AddToWorklist(Zero.getNode());
	Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, Res, VPermMask, Zero);
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}

	// If we have a dual input lane-crossing shuffle then lower to VPERMV3.
	if ((Depth >= 3 \|\| HasVariableMask) && !MaskContainsZeros &&
	((Subtarget.hasAVX512() &&
	(MaskVT == MVT::v8f64 \|\| MaskVT == MVT::v8i64 \|\|
	MaskVT == MVT::v16f32 \|\| MaskVT == MVT::v16i32)) \|\|
	(Subtarget.hasVLX() &&
	(MaskVT == MVT::v4f64 \|\| MaskVT == MVT::v4i64 \|\|
	MaskVT == MVT::v8f32 \|\| MaskVT == MVT::v8i32)) \|\|
	(Subtarget.hasBWI() && MaskVT == MVT::v32i16) \|\|
	(Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) \|\|
	(Subtarget.hasVBMI() && MaskVT == MVT::v64i8) \|\|
	(Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
	SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
	DCI.AddToWorklist(VPermMask.getNode());
	V1 = DAG.getBitcast(MaskVT, V1);
	DCI.AddToWorklist(V1.getNode());
	V2 = DAG.getBitcast(MaskVT, V2);
	DCI.AddToWorklist(V2.getNode());
	Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2);
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}
	return false;
	}

	// See if we can combine a single input shuffle with zeros to a bit-mask,
	// which is much simpler than any shuffle.
	if (UnaryShuffle && MaskContainsZeros && (Depth >= 3 \|\| HasVariableMask) &&
	isSequentialOrUndefOrZeroInRange(Mask, 0, NumMaskElts, 0) &&
	DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) {
	APInt Zero = APInt::getNullValue(MaskEltSizeInBits);
	APInt AllOnes = APInt::getAllOnesValue(MaskEltSizeInBits);
	APInt UndefElts(NumMaskElts, 0);
	SmallVector<APInt, 64> EltBits(NumMaskElts, Zero);
	for (unsigned i = 0; i != NumMaskElts; ++i) {
	int M = Mask[i];
	if (M == SM_SentinelUndef) {
	UndefElts.setBit(i);
	continue;
	}
	if (M == SM_SentinelZero)
	continue;
	EltBits[i] = AllOnes;
	}
	SDValue BitMask = getConstVector(EltBits, UndefElts, MaskVT, DAG, DL);
	DCI.AddToWorklist(BitMask.getNode());
	Res = DAG.getBitcast(MaskVT, V1);
	DCI.AddToWorklist(Res.getNode());
	unsigned AndOpcode =
	FloatDomain ? unsigned(X86ISD::FAND) : unsigned(ISD::AND);
	Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask);
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}

	// If we have a single input shuffle with different shuffle patterns in the
	// the 128-bit lanes use the variable mask to VPERMILPS.
	// TODO Combine other mask types at higher depths.
	if (UnaryShuffle && HasVariableMask && !MaskContainsZeros &&
	((MaskVT == MVT::v8f32 && Subtarget.hasAVX()) \|\|
	(MaskVT == MVT::v16f32 && Subtarget.hasAVX512()))) {
	SmallVector<SDValue, 16> VPermIdx;
	for (int M : Mask) {
	SDValue Idx =
	M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32);
	VPermIdx.push_back(Idx);
	}
	SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx);
	DCI.AddToWorklist(VPermMask.getNode());
	Res = DAG.getBitcast(MaskVT, V1);
	DCI.AddToWorklist(Res.getNode());
	Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask);
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}

	// With XOP, binary shuffles of 128/256-bit floating point vectors can combine
	// to VPERMIL2PD/VPERMIL2PS.
	if ((Depth >= 3 \|\| HasVariableMask) && Subtarget.hasXOP() &&
	(MaskVT == MVT::v2f64 \|\| MaskVT == MVT::v4f64 \|\| MaskVT == MVT::v4f32 \|\|
	MaskVT == MVT::v8f32)) {
	// VPERMIL2 Operation.
	// Bits[3] - Match Bit.
	// Bits[2:1] - (Per Lane) PD Shuffle Mask.
	// Bits[2:0] - (Per Lane) PS Shuffle Mask.
	unsigned NumLanes = MaskVT.getSizeInBits() / 128;
	unsigned NumEltsPerLane = NumMaskElts / NumLanes;
	SmallVector<int, 8> VPerm2Idx;
	unsigned M2ZImm = 0;
	for (int M : Mask) {
	if (M == SM_SentinelUndef) {
	VPerm2Idx.push_back(-1);
	continue;
	}
	if (M == SM_SentinelZero) {
	M2ZImm = 2;
	VPerm2Idx.push_back(8);
	continue;
	}
	int Index = (M % NumEltsPerLane) + ((M / NumMaskElts) * NumEltsPerLane);
	Index = (MaskVT.getScalarSizeInBits() == 64 ? Index << 1 : Index);
	VPerm2Idx.push_back(Index);
	}
	V1 = DAG.getBitcast(MaskVT, V1);
	DCI.AddToWorklist(V1.getNode());
	V2 = DAG.getBitcast(MaskVT, V2);
	DCI.AddToWorklist(V2.getNode());
	SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true);
	DCI.AddToWorklist(VPerm2MaskOp.getNode());
	Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
	DAG.getConstant(M2ZImm, DL, MVT::i8));
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}

	// If we have 3 or more shuffle instructions or a chain involving a variable
	// mask, we can replace them with a single PSHUFB instruction profitably.
	// Intel's manuals suggest only using PSHUFB if doing so replacing 5
	// instructions, but in practice PSHUFB tends to be very fast so we're
	// more aggressive.
	if (UnaryShuffle && (Depth >= 3 \|\| HasVariableMask) &&
	((RootVT.is128BitVector() && Subtarget.hasSSSE3()) \|\|
	(RootVT.is256BitVector() && Subtarget.hasAVX2()) \|\|
	(RootVT.is512BitVector() && Subtarget.hasBWI()))) {
	SmallVector<SDValue, 16> PSHUFBMask;
	int NumBytes = RootVT.getSizeInBits() / 8;
	int Ratio = NumBytes / NumMaskElts;
	for (int i = 0; i < NumBytes; ++i) {
	int M = Mask[i / Ratio];
	if (M == SM_SentinelUndef) {
	PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
	continue;
	}
	if (M == SM_SentinelZero) {
	PSHUFBMask.push_back(DAG.getConstant(255, DL, MVT::i8));
	continue;
	}
	M = Ratio * M + i % Ratio;
	assert ((M / 16) == (i / 16) && "Lane crossing detected");
	PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8));
	}
	MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes);
	Res = DAG.getBitcast(ByteVT, V1);
	DCI.AddToWorklist(Res.getNode());
	SDValue PSHUFBMaskOp = DAG.getBuildVector(ByteVT, DL, PSHUFBMask);
	DCI.AddToWorklist(PSHUFBMaskOp.getNode());
	Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp);
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}

	// With XOP, if we have a 128-bit binary input shuffle we can always combine
	// to VPPERM. We match the depth requirement of PSHUFB - VPPERM is never
	// slower than PSHUFB on targets that support both.
	if ((Depth >= 3 \|\| HasVariableMask) && RootVT.is128BitVector() &&
	Subtarget.hasXOP()) {
	// VPPERM Mask Operation
	// Bits[4:0] - Byte Index (0 - 31)
	// Bits[7:5] - Permute Operation (0 - Source byte, 4 - ZERO)
	SmallVector<SDValue, 16> VPPERMMask;
	int NumBytes = 16;
	int Ratio = NumBytes / NumMaskElts;
	for (int i = 0; i < NumBytes; ++i) {
	int M = Mask[i / Ratio];
	if (M == SM_SentinelUndef) {
	VPPERMMask.push_back(DAG.getUNDEF(MVT::i8));
	continue;
	}
	if (M == SM_SentinelZero) {
	VPPERMMask.push_back(DAG.getConstant(128, DL, MVT::i8));
	continue;
	}
	M = Ratio * M + i % Ratio;
	VPPERMMask.push_back(DAG.getConstant(M, DL, MVT::i8));
	}
	MVT ByteVT = MVT::v16i8;
	V1 = DAG.getBitcast(ByteVT, V1);
	DCI.AddToWorklist(V1.getNode());
	V2 = DAG.getBitcast(ByteVT, V2);
	DCI.AddToWorklist(V2.getNode());
	SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask);
	DCI.AddToWorklist(VPPERMMaskOp.getNode());
	Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp);
	DCI.AddToWorklist(Res.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
	/AddTo/ true);
	return true;
	}

	// Failed to find any combines.
	return false;
	}

	// Attempt to constant fold all of the constant source ops.
	// Returns true if the entire shuffle is folded to a constant.
	// TODO: Extend this to merge multiple constant Ops and update the mask.
	static bool combineX86ShufflesConstants(const SmallVectorImpl<SDValue> &Ops,
	ArrayRef<int> Mask, SDValue Root,
	bool HasVariableMask, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	MVT VT = Root.getSimpleValueType();

	unsigned SizeInBits = VT.getSizeInBits();
	unsigned NumMaskElts = Mask.size();
	unsigned MaskSizeInBits = SizeInBits / NumMaskElts;
	unsigned NumOps = Ops.size();

	// Extract constant bits from each source op.
	bool OneUseConstantOp = false;
	SmallVector<APInt, 16> UndefEltsOps(NumOps);
	SmallVector<SmallVector<APInt, 16>, 16> RawBitsOps(NumOps);
	for (unsigned i = 0; i != NumOps; ++i) {
	SDValue SrcOp = Ops[i];
	OneUseConstantOp \|= SrcOp.hasOneUse();
	if (!getTargetConstantBitsFromNode(SrcOp, MaskSizeInBits, UndefEltsOps[i],
	RawBitsOps[i]))
	return false;
	}

	// Only fold if at least one of the constants is only used once or
	// the combined shuffle has included a variable mask shuffle, this
	// is to avoid constant pool bloat.
	if (!OneUseConstantOp && !HasVariableMask)
	return false;

	// Shuffle the constant bits according to the mask.
	APInt UndefElts(NumMaskElts, 0);
	APInt ZeroElts(NumMaskElts, 0);
	APInt ConstantElts(NumMaskElts, 0);
	SmallVector<APInt, 8> ConstantBitData(NumMaskElts,
	APInt::getNullValue(MaskSizeInBits));
	for (unsigned i = 0; i != NumMaskElts; ++i) {
	int M = Mask[i];
	if (M == SM_SentinelUndef) {
	UndefElts.setBit(i);
	continue;
	} else if (M == SM_SentinelZero) {
	ZeroElts.setBit(i);
	continue;
	}
	assert(0 <= M && M < (int)(NumMaskElts * NumOps));

	unsigned SrcOpIdx = (unsigned)M / NumMaskElts;
	unsigned SrcMaskIdx = (unsigned)M % NumMaskElts;

	auto &SrcUndefElts = UndefEltsOps[SrcOpIdx];
	if (SrcUndefElts[SrcMaskIdx]) {
	UndefElts.setBit(i);
	continue;
	}

	auto &SrcEltBits = RawBitsOps[SrcOpIdx];
	APInt &Bits = SrcEltBits[SrcMaskIdx];
	if (!Bits) {
	ZeroElts.setBit(i);
	continue;
	}

	ConstantElts.setBit(i);
	ConstantBitData[i] = Bits;
	}
	assert((UndefElts \| ZeroElts \| ConstantElts).isAllOnesValue());

	// Create the constant data.
	MVT MaskSVT;
	if (VT.isFloatingPoint() && (MaskSizeInBits == 32 \|\| MaskSizeInBits == 64))
	MaskSVT = MVT::getFloatingPointVT(MaskSizeInBits);
	else
	MaskSVT = MVT::getIntegerVT(MaskSizeInBits);

	MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts);

	SDLoc DL(Root);
	SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL);
	DCI.AddToWorklist(CstOp.getNode());
	DCI.CombineTo(Root.getNode(), DAG.getBitcast(VT, CstOp));
	return true;
	}

	/// \brief Fully generic combining of x86 shuffle instructions.
	///
	/// This should be the last combine run over the x86 shuffle instructions. Once
	/// they have been fully optimized, this will recursively consider all chains
	/// of single-use shuffle instructions, build a generic model of the cumulative
	/// shuffle operation, and check for simpler instructions which implement this
	/// operation. We use this primarily for two purposes:
	///
	/// 1) Collapse generic shuffles to specialized single instructions when
	/// equivalent. In most cases, this is just an encoding size win, but
	/// sometimes we will collapse multiple generic shuffles into a single
	/// special-purpose shuffle.
	/// 2) Look for sequences of shuffle instructions with 3 or more total
	/// instructions, and replace them with the slightly more expensive SSSE3
	/// PSHUFB instruction if available. We do this as the last combining step
	/// to ensure we avoid using PSHUFB if we can implement the shuffle with
	/// a suitable short sequence of other instructions. The PSHUFB will either
	/// use a register or have to read from memory and so is slightly (but only
	/// slightly) more expensive than the other shuffle instructions.
	///
	/// Because this is inherently a quadratic operation (for each shuffle in
	/// a chain, we recurse up the chain), the depth is limited to 8 instructions.
	/// This should never be an issue in practice as the shuffle lowering doesn't
	/// produce sequences of more than 8 instructions.
	///
	/// FIXME: We will currently miss some cases where the redundant shuffling
	/// would simplify under the threshold for PSHUFB formation because of
	/// combine-ordering. To fix this, we should do the redundant instruction
	/// combining in this recursive walk.
	static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
	int SrcOpIndex, SDValue Root,
	ArrayRef<int> RootMask,
	ArrayRef<const SDNode*> SrcNodes,
	int Depth, bool HasVariableMask,
	SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	// Bound the depth of our recursive combine because this is ultimately
	// quadratic in nature.
	if (Depth > 8)
	return false;

	// Directly rip through bitcasts to find the underlying operand.
	SDValue Op = SrcOps[SrcOpIndex];
	Op = peekThroughOneUseBitcasts(Op);

	MVT VT = Op.getSimpleValueType();
	if (!VT.isVector())
	return false; // Bail if we hit a non-vector.

	assert(Root.getSimpleValueType().isVector() &&
	"Shuffles operate on vector types!");
	assert(VT.getSizeInBits() == Root.getSimpleValueType().getSizeInBits() &&
	"Can only combine shuffles of the same vector register size.");

	// Extract target shuffle mask and resolve sentinels and inputs.
	SmallVector<int, 64> OpMask;
	SmallVector<SDValue, 2> OpInputs;
	if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
	return false;

	assert(OpInputs.size() <= 2 && "Too many shuffle inputs");
	SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue());
	SDValue Input1 = (OpInputs.size() > 1 ? OpInputs[1] : SDValue());

	// Add the inputs to the Ops list, avoiding duplicates.
	SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());

	int InputIdx0 = -1, InputIdx1 = -1;
	for (int i = 0, e = Ops.size(); i < e; ++i) {
	SDValue BC = peekThroughBitcasts(Ops[i]);
	if (Input0 && BC == peekThroughBitcasts(Input0))
	InputIdx0 = i;
	if (Input1 && BC == peekThroughBitcasts(Input1))
	InputIdx1 = i;
	}

	if (Input0 && InputIdx0 < 0) {
	InputIdx0 = SrcOpIndex;
	Ops[SrcOpIndex] = Input0;
	}
	if (Input1 && InputIdx1 < 0) {
	InputIdx1 = Ops.size();
	Ops.push_back(Input1);
	}

	assert(((RootMask.size() > OpMask.size() &&
	RootMask.size() % OpMask.size() == 0) \|\|
	(OpMask.size() > RootMask.size() &&
	OpMask.size() % RootMask.size() == 0) \|\|
	OpMask.size() == RootMask.size()) &&
	"The smaller number of elements must divide the larger.");

	// This function can be performance-critical, so we rely on the power-of-2
	// knowledge that we have about the mask sizes to replace div/rem ops with
	// bit-masks and shifts.
	assert(isPowerOf2_32(RootMask.size()) && "Non-power-of-2 shuffle mask sizes");
	assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes");
	unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size());
	unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size());

	unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size());
	unsigned RootRatio = std::max<unsigned>(1, OpMask.size() >> RootMaskSizeLog2);
	unsigned OpRatio = std::max<unsigned>(1, RootMask.size() >> OpMaskSizeLog2);
	assert((RootRatio == 1 \|\| OpRatio == 1) &&
	"Must not have a ratio for both incoming and op masks!");

	assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes");
	assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes");
	assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes");
	unsigned RootRatioLog2 = countTrailingZeros(RootRatio);
	unsigned OpRatioLog2 = countTrailingZeros(OpRatio);

	SmallVector<int, 64> Mask(MaskWidth, SM_SentinelUndef);

	// Merge this shuffle operation's mask into our accumulated mask. Note that
	// this shuffle's mask will be the first applied to the input, followed by the
	// root mask to get us all the way to the root value arrangement. The reason
	// for this order is that we are recursing up the operation chain.
	for (unsigned i = 0; i < MaskWidth; ++i) {
	unsigned RootIdx = i >> RootRatioLog2;
	if (RootMask[RootIdx] < 0) {
	// This is a zero or undef lane, we're done.
	Mask[i] = RootMask[RootIdx];
	continue;
	}

	unsigned RootMaskedIdx =
	RootRatio == 1
	? RootMask[RootIdx]
	: (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1));

	// Just insert the scaled root mask value if it references an input other
	// than the SrcOp we're currently inserting.
	if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) \|\|
	(((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) {
	Mask[i] = RootMaskedIdx;
	continue;
	}

	RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1);
	unsigned OpIdx = RootMaskedIdx >> OpRatioLog2;
	if (OpMask[OpIdx] < 0) {
	// The incoming lanes are zero or undef, it doesn't matter which ones we
	// are using.
	Mask[i] = OpMask[OpIdx];
	continue;
	}

	// Ok, we have non-zero lanes, map them through to one of the Op's inputs.
	unsigned OpMaskedIdx =
	OpRatio == 1
	? OpMask[OpIdx]
	: (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1));

	OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
	if (OpMask[OpIdx] < (int)OpMask.size()) {
	assert(0 <= InputIdx0 && "Unknown target shuffle input");
	OpMaskedIdx += InputIdx0 * MaskWidth;
	} else {
	assert(0 <= InputIdx1 && "Unknown target shuffle input");
	OpMaskedIdx += InputIdx1 * MaskWidth;
	}

	Mask[i] = OpMaskedIdx;
	}

	// Handle the all undef/zero cases early.
	if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) {
	DCI.CombineTo(Root.getNode(), DAG.getUNDEF(Root.getValueType()));
	return true;
	}
	if (all_of(Mask, [](int Idx) { return Idx < 0; })) {
	// TODO - should we handle the mixed zero/undef case as well? Just returning
	// a zero mask will lose information on undef elements possibly reducing
	// future combine possibilities.
	DCI.CombineTo(Root.getNode(), getZeroVector(Root.getSimpleValueType(),
	Subtarget, DAG, SDLoc(Root)));
	return true;
	}

	// Remove unused shuffle source ops.
	resolveTargetShuffleInputsAndMask(Ops, Mask);
	assert(!Ops.empty() && "Shuffle with no inputs detected");

	HasVariableMask \|= isTargetShuffleVariableMask(Op.getOpcode());

	// Update the list of shuffle nodes that have been combined so far.
	SmallVector<const SDNode *, 16> CombinedNodes(SrcNodes.begin(),
	SrcNodes.end());
	CombinedNodes.push_back(Op.getNode());

	// See if we can recurse into each shuffle source op (if it's a target
	// shuffle). The source op should only be combined if it either has a
	// single use (i.e. current Op) or all its users have already been combined.
	for (int i = 0, e = Ops.size(); i < e; ++i)
	if (Ops[i].getNode()->hasOneUse() \|\|
	SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode()))
	if (combineX86ShufflesRecursively(Ops, i, Root, Mask, CombinedNodes,
	Depth + 1, HasVariableMask, DAG, DCI,
	Subtarget))
	return true;

	// Attempt to constant fold all of the constant source ops.
	if (combineX86ShufflesConstants(Ops, Mask, Root, HasVariableMask, DAG, DCI,
	Subtarget))
	return true;

	// We can only combine unary and binary shuffle mask cases.
	if (Ops.size() > 2)
	return false;

	// Minor canonicalization of the accumulated shuffle mask to make it easier
	// to match below. All this does is detect masks with sequential pairs of
	// elements, and shrink them to the half-width mask. It does this in a loop
	// so it will reduce the size of the mask to the minimal width mask which
	// performs an equivalent shuffle.
	SmallVector<int, 64> WidenedMask;
	while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
	Mask = std::move(WidenedMask);
	}

	// Canonicalization of binary shuffle masks to improve pattern matching by
	// commuting the inputs.
	if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) {
	ShuffleVectorSDNode::commuteMask(Mask);
	std::swap(Ops[0], Ops[1]);
	}

	return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG,
	DCI, Subtarget);
	}

	/// \brief Get the PSHUF-style mask from PSHUF node.
	///
	/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
	/// PSHUF-style masks that can be reused with such instructions.
	static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
	MVT VT = N.getSimpleValueType();
	SmallVector<int, 4> Mask;
	SmallVector<SDValue, 2> Ops;
	bool IsUnary;
	bool HaveMask =
	getTargetShuffleMask(N.getNode(), VT, false, Ops, Mask, IsUnary);
	(void)HaveMask;
	assert(HaveMask);

	// If we have more than 128-bits, only the low 128-bits of shuffle mask
	// matter. Check that the upper masks are repeats and remove them.
	if (VT.getSizeInBits() > 128) {
	int LaneElts = 128 / VT.getScalarSizeInBits();
	#ifndef NDEBUG
	for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i)
	for (int j = 0; j < LaneElts; ++j)
	assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) &&
	"Mask doesn't repeat in high 128-bit lanes!");
	#endif
	Mask.resize(LaneElts);
	}

	switch (N.getOpcode()) {
	case X86ISD::PSHUFD:
	return Mask;
	case X86ISD::PSHUFLW:
	Mask.resize(4);
	return Mask;
	case X86ISD::PSHUFHW:
	Mask.erase(Mask.begin(), Mask.begin() + 4);
	for (int &M : Mask)
	M -= 4;
	return Mask;
	default:
	llvm_unreachable("No valid shuffle instruction found!");
	}
	}

	/// \brief Search for a combinable shuffle across a chain ending in pshufd.
	///
	/// We walk up the chain and look for a combinable shuffle, skipping over
	/// shuffles that we could hoist this shuffle's transformation past without
	/// altering anything.
	static SDValue
	combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
	SelectionDAG &DAG) {
	assert(N.getOpcode() == X86ISD::PSHUFD &&
	"Called with something other than an x86 128-bit half shuffle!");
	SDLoc DL(N);

	// Walk up a single-use chain looking for a combinable shuffle. Keep a stack
	// of the shuffles in the chain so that we can form a fresh chain to replace
	// this one.
	SmallVector<SDValue, 8> Chain;
	SDValue V = N.getOperand(0);
	for (; V.hasOneUse(); V = V.getOperand(0)) {
	switch (V.getOpcode()) {
	default:
	return SDValue(); // Nothing combined!

	case ISD::BITCAST:
	// Skip bitcasts as we always know the type for the target specific
	// instructions.
	continue;

	case X86ISD::PSHUFD:
	// Found another dword shuffle.
	break;

	case X86ISD::PSHUFLW:
	// Check that the low words (being shuffled) are the identity in the
	// dword shuffle, and the high words are self-contained.
	if (Mask[0] != 0 \|\| Mask[1] != 1 \|\|
	!(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4))
	return SDValue();

	Chain.push_back(V);
	continue;

	case X86ISD::PSHUFHW:
	// Check that the high words (being shuffled) are the identity in the
	// dword shuffle, and the low words are self-contained.
	if (Mask[2] != 2 \|\| Mask[3] != 3 \|\|
	!(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2))
	return SDValue();

	Chain.push_back(V);
	continue;

	case X86ISD::UNPCKL:
	case X86ISD::UNPCKH:
	// For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
	// shuffle into a preceding word shuffle.
	if (V.getSimpleValueType().getVectorElementType() != MVT::i8 &&
	V.getSimpleValueType().getVectorElementType() != MVT::i16)
	return SDValue();

	// Search for a half-shuffle which we can combine with.
	unsigned CombineOp =
	V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
	if (V.getOperand(0) != V.getOperand(1) \|\|
	!V->isOnlyUserOf(V.getOperand(0).getNode()))
	return SDValue();
	Chain.push_back(V);
	V = V.getOperand(0);
	do {
	switch (V.getOpcode()) {
	default:
	return SDValue(); // Nothing to combine.

	case X86ISD::PSHUFLW:
	case X86ISD::PSHUFHW:
	if (V.getOpcode() == CombineOp)
	break;

	Chain.push_back(V);

	LLVM_FALLTHROUGH;
	case ISD::BITCAST:
	V = V.getOperand(0);
	continue;
	}
	break;
	} while (V.hasOneUse());
	break;
	}
	// Break out of the loop if we break out of the switch.
	break;
	}

	if (!V.hasOneUse())
	// We fell out of the loop without finding a viable combining instruction.
	return SDValue();

	// Merge this node's mask and our incoming mask.
	SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
	for (int &M : Mask)
	M = VMask[M];
	V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0),
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));

	// Rebuild the chain around this new shuffle.
	while (!Chain.empty()) {
	SDValue W = Chain.pop_back_val();

	if (V.getValueType() != W.getOperand(0).getValueType())
	V = DAG.getBitcast(W.getOperand(0).getValueType(), V);

	switch (W.getOpcode()) {
	default:
	llvm_unreachable("Only PSHUF and UNPCK instructions get here!");

	case X86ISD::UNPCKL:
	case X86ISD::UNPCKH:
	V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, V);
	break;

	case X86ISD::PSHUFD:
	case X86ISD::PSHUFLW:
	case X86ISD::PSHUFHW:
	V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, W.getOperand(1));
	break;
	}
	}
	if (V.getValueType() != N.getValueType())
	V = DAG.getBitcast(N.getValueType(), V);

	// Return the new chain to replace N.
	return V;
	}

	/// \brief Search for a combinable shuffle across a chain ending in pshuflw or
	/// pshufhw.
	///
	/// We walk up the chain, skipping shuffles of the other half and looking
	/// through shuffles which switch halves trying to find a shuffle of the same
	/// pair of dwords.
	static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
	SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	assert(
	(N.getOpcode() == X86ISD::PSHUFLW \|\| N.getOpcode() == X86ISD::PSHUFHW) &&
	"Called with something other than an x86 128-bit half shuffle!");
	SDLoc DL(N);
	unsigned CombineOpcode = N.getOpcode();

	// Walk up a single-use chain looking for a combinable shuffle.
	SDValue V = N.getOperand(0);
	for (; V.hasOneUse(); V = V.getOperand(0)) {
	switch (V.getOpcode()) {
	default:
	return false; // Nothing combined!

	case ISD::BITCAST:
	// Skip bitcasts as we always know the type for the target specific
	// instructions.
	continue;

	case X86ISD::PSHUFLW:
	case X86ISD::PSHUFHW:
	if (V.getOpcode() == CombineOpcode)
	break;

	// Other-half shuffles are no-ops.
	continue;
	}
	// Break out of the loop if we break out of the switch.
	break;
	}

	if (!V.hasOneUse())
	// We fell out of the loop without finding a viable combining instruction.
	return false;

	// Combine away the bottom node as its shuffle will be accumulated into
	// a preceding shuffle.
	DCI.CombineTo(N.getNode(), N.getOperand(0), /AddTo/ true);

	// Record the old value.
	SDValue Old = V;

	// Merge this node's mask and our incoming mask (adjusted to account for all
	// the pshufd instructions encountered).
	SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
	for (int &M : Mask)
	M = VMask[M];
	V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
	getV4X86ShuffleImm8ForMask(Mask, DL, DAG));

	// Check that the shuffles didn't cancel each other out. If not, we need to
	// combine to the new one.
	if (Old != V)
	// Replace the combinable shuffle with the combined one, updating all users
	// so that we re-evaluate the chain here.
	DCI.CombineTo(Old.getNode(), V, /AddTo/ true);

	return true;
	}

	/// \brief Try to combine x86 target specific shuffles.
	static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDLoc DL(N);
	MVT VT = N.getSimpleValueType();
	SmallVector<int, 4> Mask;

	unsigned Opcode = N.getOpcode();
	switch (Opcode) {
	case X86ISD::PSHUFD:
	case X86ISD::PSHUFLW:
	case X86ISD::PSHUFHW:
	Mask = getPSHUFShuffleMask(N);
	assert(Mask.size() == 4);
	break;
	case X86ISD::UNPCKL: {
	auto Op0 = N.getOperand(0);
	auto Op1 = N.getOperand(1);
	unsigned Opcode0 = Op0.getOpcode();
	unsigned Opcode1 = Op1.getOpcode();

	// Combine X86ISD::UNPCKL with 2 X86ISD::FHADD inputs into a single
	// X86ISD::FHADD. This is generated by UINT_TO_FP v2f64 scalarization.
	// TODO: Add other horizontal operations as required.
	if (VT == MVT::v2f64 && Opcode0 == Opcode1 && Opcode0 == X86ISD::FHADD)
	return DAG.getNode(Opcode0, DL, VT, Op0.getOperand(0), Op1.getOperand(0));

	// Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in
	// which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE
	// moves upper half elements into the lower half part. For example:
	//
	// t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1,
	// undef:v16i8
	// t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2
	//
	// will be combined to:
	//
	// t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1

	// This is only for 128-bit vectors. From SSE4.1 onward this combine may not
	// happen due to advanced instructions.
	if (!VT.is128BitVector())
	return SDValue();

	if (Op0.isUndef() && Opcode1 == ISD::VECTOR_SHUFFLE) {
	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op1.getNode())->getMask();

	unsigned NumElts = VT.getVectorNumElements();
	SmallVector<int, 8> ExpectedMask(NumElts, -1);
	std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2,
	NumElts / 2);

	auto ShufOp = Op1.getOperand(0);
	if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask))
	return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp);
	}
	return SDValue();
	}
	case X86ISD::BLENDI: {
	SDValue V0 = N->getOperand(0);
	SDValue V1 = N->getOperand(1);
	assert(VT == V0.getSimpleValueType() && VT == V1.getSimpleValueType() &&
	"Unexpected input vector types");

	// Canonicalize a v2f64 blend with a mask of 2 by swapping the vector
	// operands and changing the mask to 1. This saves us a bunch of
	// pattern-matching possibilities related to scalar math ops in SSE/AVX.
	// x86InstrInfo knows how to commute this back after instruction selection
	// if it would help register allocation.

	// TODO: If optimizing for size or a processor that doesn't suffer from
	// partial register update stalls, this should be transformed into a MOVSD
	// instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.

	if (VT == MVT::v2f64)
	if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2)))
	if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) {
	SDValue NewMask = DAG.getConstant(1, DL, MVT::i8);
	return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
	}

	return SDValue();
	}
	case X86ISD::MOVSD:
	case X86ISD::MOVSS: {
	SDValue V0 = peekThroughBitcasts(N->getOperand(0));
	SDValue V1 = peekThroughBitcasts(N->getOperand(1));
	bool isZero0 = ISD::isBuildVectorAllZeros(V0.getNode());
	bool isZero1 = ISD::isBuildVectorAllZeros(V1.getNode());
	if (isZero0 && isZero1)
	return SDValue();

	// We often lower to MOVSD/MOVSS from integer as well as native float
	// types; remove unnecessary domain-crossing bitcasts if we can to make it
	// easier to combine shuffles later on. We've already accounted for the
	// domain switching cost when we decided to lower with it.
	bool isFloat = VT.isFloatingPoint();
	bool isFloat0 = V0.getSimpleValueType().isFloatingPoint();
	bool isFloat1 = V1.getSimpleValueType().isFloatingPoint();
	if ((isFloat != isFloat0 \|\| isZero0) && (isFloat != isFloat1 \|\| isZero1)) {
	MVT NewVT = isFloat ? (X86ISD::MOVSD == Opcode ? MVT::v2i64 : MVT::v4i32)
	: (X86ISD::MOVSD == Opcode ? MVT::v2f64 : MVT::v4f32);
	V0 = DAG.getBitcast(NewVT, V0);
	V1 = DAG.getBitcast(NewVT, V1);
	return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, NewVT, V0, V1));
	}

	return SDValue();
	}
	case X86ISD::INSERTPS: {
	assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32");
	SDValue Op0 = N.getOperand(0);
	SDValue Op1 = N.getOperand(1);
	SDValue Op2 = N.getOperand(2);
	unsigned InsertPSMask = cast<ConstantSDNode>(Op2)->getZExtValue();
	unsigned SrcIdx = (InsertPSMask >> 6) & 0x3;
	unsigned DstIdx = (InsertPSMask >> 4) & 0x3;
	unsigned ZeroMask = InsertPSMask & 0xF;

	// If we zero out all elements from Op0 then we don't need to reference it.
	if (((ZeroMask \| (1u << DstIdx)) == 0xF) && !Op0.isUndef())
	return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1,
	DAG.getConstant(InsertPSMask, DL, MVT::i8));

	// If we zero out the element from Op1 then we don't need to reference it.
	if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef())
	return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
	DAG.getConstant(InsertPSMask, DL, MVT::i8));

	// Attempt to merge insertps Op1 with an inner target shuffle node.
	SmallVector<int, 8> TargetMask1;
	SmallVector<SDValue, 2> Ops1;
	if (setTargetShuffleZeroElements(Op1, TargetMask1, Ops1)) {
	int M = TargetMask1[SrcIdx];
	if (isUndefOrZero(M)) {
	// Zero/UNDEF insertion - zero out element and remove dependency.
	InsertPSMask \|= (1u << DstIdx);
	return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
	DAG.getConstant(InsertPSMask, DL, MVT::i8));
	}
	// Update insertps mask srcidx and reference the source input directly.
	assert(0 <= M && M < 8 && "Shuffle index out of range");
	InsertPSMask = (InsertPSMask & 0x3f) \| ((M & 0x3) << 6);
	Op1 = Ops1[M < 4 ? 0 : 1];
	return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
	DAG.getConstant(InsertPSMask, DL, MVT::i8));
	}

	// Attempt to merge insertps Op0 with an inner target shuffle node.
	SmallVector<int, 8> TargetMask0;
	SmallVector<SDValue, 2> Ops0;
	if (!setTargetShuffleZeroElements(Op0, TargetMask0, Ops0))
	return SDValue();

	bool Updated = false;
	bool UseInput00 = false;
	bool UseInput01 = false;
	for (int i = 0; i != 4; ++i) {
	int M = TargetMask0[i];
	if ((InsertPSMask & (1u << i)) \|\| (i == (int)DstIdx)) {
	// No change if element is already zero or the inserted element.
	continue;
	} else if (isUndefOrZero(M)) {
	// If the target mask is undef/zero then we must zero the element.
	InsertPSMask \|= (1u << i);
	Updated = true;
	continue;
	}

	// The input vector element must be inline.
	if (M != i && M != (i + 4))
	return SDValue();

	// Determine which inputs of the target shuffle we're using.
	UseInput00 \|= (0 <= M && M < 4);
	UseInput01 \|= (4 <= M);
	}

	// If we're not using both inputs of the target shuffle then use the
	// referenced input directly.
	if (UseInput00 && !UseInput01) {
	Updated = true;
	Op0 = Ops0[0];
	} else if (!UseInput00 && UseInput01) {
	Updated = true;
	Op0 = Ops0[1];
	}

	if (Updated)
	return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
	DAG.getConstant(InsertPSMask, DL, MVT::i8));

	return SDValue();
	}
	default:
	return SDValue();
	}

	// Nuke no-op shuffles that show up after combining.
	if (isNoopShuffleMask(Mask))
	return DCI.CombineTo(N.getNode(), N.getOperand(0), /AddTo/ true);

	// Look for simplifications involving one or two shuffle instructions.
	SDValue V = N.getOperand(0);
	switch (N.getOpcode()) {
	default:
	break;
	case X86ISD::PSHUFLW:
	case X86ISD::PSHUFHW:
	assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!");

	if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
	return SDValue(); // We combined away this shuffle, so we're done.

	// See if this reduces to a PSHUFD which is no more expensive and can
	// combine with more operations. Note that it has to at least flip the
	// dwords as otherwise it would have been removed as a no-op.
	if (makeArrayRef(Mask).equals({2, 3, 0, 1})) {
	int DMask[] = {0, 1, 2, 3};
	int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
	DMask[DOffset + 0] = DOffset + 1;
	DMask[DOffset + 1] = DOffset + 0;
	MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
	V = DAG.getBitcast(DVT, V);
	DCI.AddToWorklist(V.getNode());
	V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V,
	getV4X86ShuffleImm8ForMask(DMask, DL, DAG));
	DCI.AddToWorklist(V.getNode());
	return DAG.getBitcast(VT, V);
	}

	// Look for shuffle patterns which can be implemented as a single unpack.
	// FIXME: This doesn't handle the location of the PSHUFD generically, and
	// only works when we have a PSHUFD followed by two half-shuffles.
	if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
	(V.getOpcode() == X86ISD::PSHUFLW \|\|
	V.getOpcode() == X86ISD::PSHUFHW) &&
	V.getOpcode() != N.getOpcode() &&
	V.hasOneUse()) {
	SDValue D = peekThroughOneUseBitcasts(V.getOperand(0));
	if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) {
	SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
	SmallVector<int, 4> DMask = getPSHUFShuffleMask(D);
	int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
	int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
	int WordMask[8];
	for (int i = 0; i < 4; ++i) {
	WordMask[i + NOffset] = Mask[i] + NOffset;
	WordMask[i + VOffset] = VMask[i] + VOffset;
	}
	// Map the word mask through the DWord mask.
	int MappedMask[8];
	for (int i = 0; i < 8; ++i)
	MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
	if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) \|\|
	makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) {
	// We can replace all three shuffles with an unpack.
	V = DAG.getBitcast(VT, D.getOperand(0));
	DCI.AddToWorklist(V.getNode());
	return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
	: X86ISD::UNPCKH,
	DL, VT, V, V);
	}
	}
	}

	break;

	case X86ISD::PSHUFD:
	if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG))
	return NewN;

	break;
	}

	return SDValue();
	}

	/// Returns true iff the shuffle node \p N can be replaced with ADDSUB
	/// operation. If true is returned then the operands of ADDSUB operation
	/// are written to the parameters \p Opnd0 and \p Opnd1.
	///
	/// We combine shuffle to ADDSUB directly on the abstract vector shuffle nodes
	/// so it is easier to generically match. We also insert dummy vector shuffle
	/// nodes for the operands which explicitly discard the lanes which are unused
	/// by this operation to try to flow through the rest of the combiner
	/// the fact that they're unused.
	static bool isAddSub(SDNode *N, const X86Subtarget &Subtarget,
	SDValue &Opnd0, SDValue &Opnd1) {

	EVT VT = N->getValueType(0);
	if ((!Subtarget.hasSSE3() \|\| (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
	(!Subtarget.hasAVX() \|\| (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
	(!Subtarget.hasAVX512() \|\| (VT != MVT::v16f32 && VT != MVT::v8f64)))
	return false;

	// We only handle target-independent shuffles.
	// FIXME: It would be easy and harmless to use the target shuffle mask
	// extraction tool to support more.
	if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
	return false;

	ArrayRef<int> OrigMask = cast<ShuffleVectorSDNode>(N)->getMask();
	SmallVector<int, 16> Mask(OrigMask.begin(), OrigMask.end());

	SDValue V1 = N->getOperand(0);
	SDValue V2 = N->getOperand(1);

	// We require the first shuffle operand to be the FSUB node, and the second to
	// be the FADD node.
	if (V1.getOpcode() == ISD::FADD && V2.getOpcode() == ISD::FSUB) {
	ShuffleVectorSDNode::commuteMask(Mask);
	std::swap(V1, V2);
	} else if (V1.getOpcode() != ISD::FSUB \|\| V2.getOpcode() != ISD::FADD)
	return false;

	// If there are other uses of these operations we can't fold them.
	if (!V1->hasOneUse() \|\| !V2->hasOneUse())
	return false;

	// Ensure that both operations have the same operands. Note that we can
	// commute the FADD operands.
	SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1);
	if ((V2->getOperand(0) != LHS \|\| V2->getOperand(1) != RHS) &&
	(V2->getOperand(0) != RHS \|\| V2->getOperand(1) != LHS))
	return false;

	// We're looking for blends between FADD and FSUB nodes. We insist on these
	// nodes being lined up in a specific expected pattern.
	if (!(isShuffleEquivalent(V1, V2, Mask, {0, 3}) \|\|
	isShuffleEquivalent(V1, V2, Mask, {0, 5, 2, 7}) \|\|
	isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}) \|\|
	isShuffleEquivalent(V1, V2, Mask, {0, 17, 2, 19, 4, 21, 6, 23,
	8, 25, 10, 27, 12, 29, 14, 31})))
	return false;

	Opnd0 = LHS;
	Opnd1 = RHS;
	return true;
	}

	/// \brief Try to combine a shuffle into a target-specific add-sub or
	/// mul-add-sub node.
	static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
	const X86Subtarget &Subtarget,
	SelectionDAG &DAG) {
	SDValue Opnd0, Opnd1;
	if (!isAddSub(N, Subtarget, Opnd0, Opnd1))
	return SDValue();

	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	// Try to generate X86ISD::FMADDSUB node here.
	SDValue Opnd2;
	if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
	return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);

	// Do not generate X86ISD::ADDSUB node for 512-bit types even though
	// the ADDSUB idiom has been successfully recognized. There are no known
	// X86 targets with 512-bit ADDSUB instructions!
	if (VT.is512BitVector())
	return SDValue();

	return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
	}

	// We are looking for a shuffle where both sources are concatenated with undef
	// and have a width that is half of the output's width. AVX2 has VPERMD/Q, so
	// if we can express this as a single-source shuffle, that's preferable.
	static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (!Subtarget.hasAVX2() \|\| !isa<ShuffleVectorSDNode>(N))
	return SDValue();

	EVT VT = N->getValueType(0);

	// We only care about shuffles of 128/256-bit vectors of 32/64-bit values.
	if (!VT.is128BitVector() && !VT.is256BitVector())
	return SDValue();

	if (VT.getVectorElementType() != MVT::i32 &&
	VT.getVectorElementType() != MVT::i64 &&
	VT.getVectorElementType() != MVT::f32 &&
	VT.getVectorElementType() != MVT::f64)
	return SDValue();

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// Check that both sources are concats with undef.
	if (N0.getOpcode() != ISD::CONCAT_VECTORS \|\|
	N1.getOpcode() != ISD::CONCAT_VECTORS \|\| N0.getNumOperands() != 2 \|\|
	N1.getNumOperands() != 2 \|\| !N0.getOperand(1).isUndef() \|\|
	!N1.getOperand(1).isUndef())
	return SDValue();

	// Construct the new shuffle mask. Elements from the first source retain their
	// index, but elements from the second source no longer need to skip an undef.
	SmallVector<int, 8> Mask;
	int NumElts = VT.getVectorNumElements();

	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
	for (int Elt : SVOp->getMask())
	Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2));

	SDLoc DL(N);
	SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0),
	N1.getOperand(0));
	return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask);
	}

	static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	// If we have legalized the vector types, look for blends of FADD and FSUB
	// nodes that we can fuse into an ADDSUB node.
	if (TLI.isTypeLegal(VT))
	if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
	return AddSub;

	// During Type Legalization, when promoting illegal vector types,
	// the backend might introduce new shuffle dag nodes and bitcasts.
	//
	// This code performs the following transformation:
	// fold: (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) ->
	// (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>)
	//
	// We do this only if both the bitcast and the BINOP dag nodes have
	// one use. Also, perform this transformation only if the new binary
	// operation is legal. This is to avoid introducing dag nodes that
	// potentially need to be further expanded (or custom lowered) into a
	// less optimal sequence of dag nodes.
	if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() &&
	N->getOpcode() == ISD::VECTOR_SHUFFLE &&
	N->getOperand(0).getOpcode() == ISD::BITCAST &&
	N->getOperand(1).isUndef() && N->getOperand(0).hasOneUse()) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	SDValue BC0 = N0.getOperand(0);
	EVT SVT = BC0.getValueType();
	unsigned Opcode = BC0.getOpcode();
	unsigned NumElts = VT.getVectorNumElements();

	if (BC0.hasOneUse() && SVT.isVector() &&
	SVT.getVectorNumElements() * 2 == NumElts &&
	TLI.isOperationLegal(Opcode, VT)) {
	bool CanFold = false;
	switch (Opcode) {
	default : break;
	case ISD::ADD:
	case ISD::SUB:
	case ISD::MUL:
	// isOperationLegal lies for integer ops on floating point types.
	CanFold = VT.isInteger();
	break;
	case ISD::FADD:
	case ISD::FSUB:
	case ISD::FMUL:
	// isOperationLegal lies for floating point ops on integer types.
	CanFold = VT.isFloatingPoint();
	break;
	}

	unsigned SVTNumElts = SVT.getVectorNumElements();
	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
	for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i)
	CanFold = SVOp->getMaskElt(i) == (int)(i * 2);
	for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i)
	CanFold = SVOp->getMaskElt(i) < 0;

	if (CanFold) {
	SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0));
	SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1));
	SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01);
	return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, SVOp->getMask());
	}
	}
	}

	// Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
	// load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
	// consecutive, non-overlapping, and in the right order.
	SmallVector<SDValue, 16> Elts;
	for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
	if (SDValue Elt = getShuffleScalarElt(N, i, DAG, 0)) {
	Elts.push_back(Elt);
	continue;
	}
	Elts.clear();
	break;
	}

	if (Elts.size() == VT.getVectorNumElements())
	if (SDValue LD =
	EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true))
	return LD;

	// For AVX2, we sometimes want to combine
	// (vector_shuffle <mask> (concat_vectors t1, undef)
	// (concat_vectors t2, undef))
	// Into:
	// (vector_shuffle <mask> (concat_vectors t1, t2), undef)
	// Since the latter can be efficiently lowered with VPERMD/VPERMQ
	if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget))
	return ShufConcat;

	if (isTargetShuffle(N->getOpcode())) {
	SDValue Op(N, 0);
	if (SDValue Shuffle = combineTargetShuffle(Op, DAG, DCI, Subtarget))
	return Shuffle;

	// Try recursively combining arbitrary sequences of x86 shuffle
	// instructions into higher-order shuffles. We do this after combining
	// specific PSHUF instruction sequences into their minimal form so that we
	// can evaluate how many specialized shuffle instructions are involved in
	// a particular chain.
	SmallVector<int, 1> NonceMask; // Just a placeholder.
	NonceMask.push_back(0);
	if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
	/Depth/ 1, /HasVarMask/ false, DAG,
	DCI, Subtarget))
	return SDValue(); // This routine will use CombineTo to replace N.
	}

	return SDValue();
	}

	/// Check if a vector extract from a target-specific shuffle of a load can be
	/// folded into a single element load.
	/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
	/// shuffles have been custom lowered so we need to handle those here.
	static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	SDValue InVec = N->getOperand(0);
	SDValue EltNo = N->getOperand(1);
	EVT EltVT = N->getValueType(0);

	if (!isa<ConstantSDNode>(EltNo))
	return SDValue();

	EVT OriginalVT = InVec.getValueType();

	// Peek through bitcasts, don't duplicate a load with other uses.
	InVec = peekThroughOneUseBitcasts(InVec);

	EVT CurrentVT = InVec.getValueType();
	if (!CurrentVT.isVector() \|\|
	CurrentVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
	return SDValue();

	if (!isTargetShuffle(InVec.getOpcode()))
	return SDValue();

	// Don't duplicate a load with other uses.
	if (!InVec.hasOneUse())
	return SDValue();

	SmallVector<int, 16> ShuffleMask;
	SmallVector<SDValue, 2> ShuffleOps;
	bool UnaryShuffle;
	if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true,
	ShuffleOps, ShuffleMask, UnaryShuffle))
	return SDValue();

	// Select the input vector, guarding against out of range extract vector.
	unsigned NumElems = CurrentVT.getVectorNumElements();
	int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
	int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];

	if (Idx == SM_SentinelZero)
	return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
	: DAG.getConstantFP(+0.0, SDLoc(N), EltVT);
	if (Idx == SM_SentinelUndef)
	return DAG.getUNDEF(EltVT);

	assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
	SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0]
	: ShuffleOps[1];

	// If inputs to shuffle are the same for both ops, then allow 2 uses
	unsigned AllowedUses =
	(ShuffleOps.size() > 1 && ShuffleOps[0] == ShuffleOps[1]) ? 2 : 1;

	if (LdNode.getOpcode() == ISD::BITCAST) {
	// Don't duplicate a load with other uses.
	if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
	return SDValue();

	AllowedUses = 1; // only allow 1 load use if we have a bitcast
	LdNode = LdNode.getOperand(0);
	}

	if (!ISD::isNormalLoad(LdNode.getNode()))
	return SDValue();

	LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);

	if (!LN0 \|\|!LN0->hasNUsesOfValue(AllowedUses, 0) \|\| LN0->isVolatile())
	return SDValue();

	// If there's a bitcast before the shuffle, check if the load type and
	// alignment is valid.
	unsigned Align = LN0->getAlignment();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
	EltVT.getTypeForEVT(*DAG.getContext()));

	if (NewAlign > Align \|\| !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT))
	return SDValue();

	// All checks match so transform back to vector_shuffle so that DAG combiner
	// can finish the job
	SDLoc dl(N);

	// Create shuffle node taking into account the case that its a unary shuffle
	SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : ShuffleOps[1];
	Shuffle = DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle,
	ShuffleMask);
	Shuffle = DAG.getBitcast(OriginalVT, Shuffle);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
	EltNo);
	}

	// Try to match patterns such as
	// (i16 bitcast (v16i1 x))
	// ->
	// (i16 movmsk (16i8 sext (v16i1 x)))
	// before the illegal vector is scalarized on subtargets that don't have legal
	// vxi1 types.
	static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
	const X86Subtarget &Subtarget) {
	EVT VT = BitCast.getValueType();
	SDValue N0 = BitCast.getOperand(0);
	EVT VecVT = N0->getValueType(0);

	if (!VT.isScalarInteger() \|\| !VecVT.isSimple())
	return SDValue();

	// With AVX512 vxi1 types are legal and we prefer using k-regs.
	// MOVMSK is supported in SSE2 or later.
	if (Subtarget.hasAVX512() \|\| !Subtarget.hasSSE2())
	return SDValue();

	// There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
	// v8f64. So all legal 128-bit and 256-bit vectors are covered except for
	// v8i16 and v16i16.
	// For these two cases, we can shuffle the upper element bytes to a
	// consecutive sequence at the start of the vector and treat the results as
	// v16i8 or v32i8, and for v61i8 this is the preferable solution. However,
	// for v16i16 this is not the case, because the shuffle is expensive, so we
	// avoid sign-extending to this type entirely.
	// For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
	// (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
	MVT SExtVT;
	MVT FPCastVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
	switch (VecVT.getSimpleVT().SimpleTy) {
	default:
	return SDValue();
	case MVT::v2i1:
	SExtVT = MVT::v2i64;
	FPCastVT = MVT::v2f64;
	break;
	case MVT::v4i1:
	SExtVT = MVT::v4i32;
	FPCastVT = MVT::v4f32;
	// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
	// sign-extend to a 256-bit operation to avoid truncation.
	if (N0->getOpcode() == ISD::SETCC &&
	N0->getOperand(0)->getValueType(0).is256BitVector() &&
	Subtarget.hasInt256()) {
	SExtVT = MVT::v4i64;
	FPCastVT = MVT::v4f64;
	}
	break;
	case MVT::v8i1:
	SExtVT = MVT::v8i16;
	// For cases such as (i8 bitcast (v8i1 setcc v8i32 v1, v2)),
	// sign-extend to a 256-bit operation to match the compare.
	// If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
	// 256-bit because the shuffle is cheaper than sign extending the result of
	// the compare.
	if (N0->getOpcode() == ISD::SETCC &&
	N0->getOperand(0)->getValueType(0).is256BitVector() &&
	Subtarget.hasInt256()) {
	SExtVT = MVT::v8i32;
	FPCastVT = MVT::v8f32;
	}
	break;
	case MVT::v16i1:
	SExtVT = MVT::v16i8;
	// For the case (i16 bitcast (v16i1 setcc v16i16 v1, v2)),
	// it is not profitable to sign-extend to 256-bit because this will
	// require an extra cross-lane shuffle which is more expensive than
	// truncating the result of the compare to 128-bits.
	break;
	case MVT::v32i1:
	// TODO: Handle pre-AVX2 cases by splitting to two v16i1's.
	if (!Subtarget.hasInt256())
	return SDValue();
	SExtVT = MVT::v32i8;
	break;
	};

	SDLoc DL(BitCast);
	SDValue V = DAG.getSExtOrTrunc(N0, DL, SExtVT);
	if (SExtVT == MVT::v8i16) {
	V = DAG.getBitcast(MVT::v16i8, V);
	V = DAG.getVectorShuffle(
	MVT::v16i8, DL, V, DAG.getUNDEF(MVT::v16i8),
	{0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
	} else
	assert(SExtVT.getScalarType() != MVT::i16 &&
	"Vectors of i16 must be shuffled");
	if (FPCastVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
	V = DAG.getBitcast(FPCastVT, V);
	V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
	return DAG.getZExtOrTrunc(V, DL, VT);
	}

	static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	EVT SrcVT = N0.getValueType();

	// Try to match patterns such as
	// (i16 bitcast (v16i1 x))
	// ->
	// (i16 movmsk (16i8 sext (v16i1 x)))
	// before the setcc result is scalarized on subtargets that don't have legal
	// vxi1 types.
	if (DCI.isBeforeLegalize())
	if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget))
	return V;
	// Since MMX types are special and don't usually play with other vector types,
	// it's better to handle them early to be sure we emit efficient code by
	// avoiding store-load conversions.

	// Detect bitcasts between i32 to x86mmx low word.
	if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR &&
	SrcVT == MVT::v2i32 && isNullConstant(N0.getOperand(1))) {
	SDValue N00 = N0->getOperand(0);
	if (N00.getValueType() == MVT::i32)
	return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
	}

	// Detect bitcasts between element or subvector extraction to x86mmx.
	if (VT == MVT::x86mmx &&
	(N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
	N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) &&
	isNullConstant(N0.getOperand(1))) {
	SDValue N00 = N0->getOperand(0);
	if (N00.getValueType().is128BitVector())
	return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT,
	DAG.getBitcast(MVT::v2i64, N00));
	}

	// Detect bitcasts from FP_TO_SINT to x86mmx.
	if (VT == MVT::x86mmx && SrcVT == MVT::v2i32 &&
	N0.getOpcode() == ISD::FP_TO_SINT) {
	SDLoc DL(N0);
	SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
	DAG.getUNDEF(MVT::v2i32));
	return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT,
	DAG.getBitcast(MVT::v2i64, Res));
	}

	// Convert a bitcasted integer logic operation that has one bitcasted
	// floating-point operand into a floating-point logic operation. This may
	// create a load of a constant, but that is cheaper than materializing the
	// constant in an integer register and transferring it to an SSE register or
	// transferring the SSE operand to integer register and back.
	unsigned FPOpcode;
	switch (N0.getOpcode()) {
	case ISD::AND: FPOpcode = X86ISD::FAND; break;
	case ISD::OR: FPOpcode = X86ISD::FOR; break;
	case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
	default: return SDValue();
	}

	if (!((Subtarget.hasSSE1() && VT == MVT::f32) \|\|
	(Subtarget.hasSSE2() && VT == MVT::f64)))
	return SDValue();

	SDValue LogicOp0 = N0.getOperand(0);
	SDValue LogicOp1 = N0.getOperand(1);
	SDLoc DL0(N0);

	// bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
	if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST &&
	LogicOp0.hasOneUse() && LogicOp0.getOperand(0).getValueType() == VT &&
	!isa<ConstantSDNode>(LogicOp0.getOperand(0))) {
	SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1);
	return DAG.getNode(FPOpcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1);
	}
	// bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y)
	if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST &&
	LogicOp1.hasOneUse() && LogicOp1.getOperand(0).getValueType() == VT &&
	!isa<ConstantSDNode>(LogicOp1.getOperand(0))) {
	SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0);
	return DAG.getNode(FPOpcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0);
	}

	return SDValue();
	}

	// Match a binop + shuffle pyramid that represents a horizontal reduction over
	// the elements of a vector.
	// Returns the vector that is being reduced on, or SDValue() if a reduction
	// was not matched.
	static SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType BinOp) {
	// The pattern must end in an extract from index 0.
	if ((Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT) \|\|
	!isNullConstant(Extract->getOperand(1)))
	return SDValue();

	unsigned Stages =
	Log2_32(Extract->getOperand(0).getValueType().getVectorNumElements());

	SDValue Op = Extract->getOperand(0);
	// At each stage, we're looking for something that looks like:
	// %s = shufflevector <8 x i32> %op, <8 x i32> undef,
	// <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
	// i32 undef, i32 undef, i32 undef, i32 undef>
	// %a = binop <8 x i32> %op, %s
	// Where the mask changes according to the stage. E.g. for a 3-stage pyramid,
	// we expect something like:
	// <4,5,6,7,u,u,u,u>
	// <2,3,u,u,u,u,u,u>
	// <1,u,u,u,u,u,u,u>
	for (unsigned i = 0; i < Stages; ++i) {
	if (Op.getOpcode() != BinOp)
	return SDValue();

	ShuffleVectorSDNode *Shuffle =
	dyn_cast<ShuffleVectorSDNode>(Op.getOperand(0).getNode());
	if (Shuffle) {
	Op = Op.getOperand(1);
	} else {
	Shuffle = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(1).getNode());
	Op = Op.getOperand(0);
	}

	// The first operand of the shuffle should be the same as the other operand
	// of the add.
	if (!Shuffle \|\| (Shuffle->getOperand(0) != Op))
	return SDValue();

	// Verify the shuffle has the expected (at this stage of the pyramid) mask.
	for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)
	if (Shuffle->getMaskElt(Index) != MaskEnd + Index)
	return SDValue();
	}

	return Op;
	}

	// Given a select, detect the following pattern:
	// 1: %2 = zext <N x i8> %0 to <N x i32>
	// 2: %3 = zext <N x i8> %1 to <N x i32>
	// 3: %4 = sub nsw <N x i32> %2, %3
	// 4: %5 = icmp sgt <N x i32> %4, [0 x N] or [-1 x N]
	// 5: %6 = sub nsw <N x i32> zeroinitializer, %4
	// 6: %7 = select <N x i1> %5, <N x i32> %4, <N x i32> %6
	// This is useful as it is the input into a SAD pattern.
	static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0,
	SDValue &Op1) {
	// Check the condition of the select instruction is greater-than.
	SDValue SetCC = Select->getOperand(0);
	if (SetCC.getOpcode() != ISD::SETCC)
	return false;
	ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
	if (CC != ISD::SETGT && CC != ISD::SETLT)
	return false;

	SDValue SelectOp1 = Select->getOperand(1);
	SDValue SelectOp2 = Select->getOperand(2);

	// The following instructions assume SelectOp1 is the subtraction operand
	// and SelectOp2 is the negation operand.
	// In the case of SETLT this is the other way around.
	if (CC == ISD::SETLT)
	std::swap(SelectOp1, SelectOp2);

	// The second operand of the select should be the negation of the first
	// operand, which is implemented as 0 - SelectOp1.
	if (!(SelectOp2.getOpcode() == ISD::SUB &&
	ISD::isBuildVectorAllZeros(SelectOp2.getOperand(0).getNode()) &&
	SelectOp2.getOperand(1) == SelectOp1))
	return false;

	// The first operand of SetCC is the first operand of the select, which is the
	// difference between the two input vectors.
	if (SetCC.getOperand(0) != SelectOp1)
	return false;

	// In SetLT case, The second operand of the comparison can be either 1 or 0.
	APInt SplatVal;
	if ((CC == ISD::SETLT) &&
	- !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) &&
	- SplatVal == 1) \|\|
	+ !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal,
	+ /AllowShrink/false) &&
	+ SplatVal.isOneValue()) \|\|
	(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()))))
	return false;

	// In SetGT case, The second operand of the comparison can be either -1 or 0.
	if ((CC == ISD::SETGT) &&
	!(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) \|\|
	ISD::isBuildVectorAllOnes(SetCC.getOperand(1).getNode())))
	return false;

	// The first operand of the select is the difference between the two input
	// vectors.
	if (SelectOp1.getOpcode() != ISD::SUB)
	return false;

	Op0 = SelectOp1.getOperand(0);
	Op1 = SelectOp1.getOperand(1);

	// Check if the operands of the sub are zero-extended from vectors of i8.
	if (Op0.getOpcode() != ISD::ZERO_EXTEND \|\|
	Op0.getOperand(0).getValueType().getVectorElementType() != MVT::i8 \|\|
	Op1.getOpcode() != ISD::ZERO_EXTEND \|\|
	Op1.getOperand(0).getValueType().getVectorElementType() != MVT::i8)
	return false;

	return true;
	}

	// Given two zexts of <k x i8> to <k x i32>, create a PSADBW of the inputs
	// to these zexts.
	static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
	const SDValue &Zext1, const SDLoc &DL) {

	// Find the appropriate width for the PSADBW.
	EVT InVT = Zext0.getOperand(0).getValueType();
	unsigned RegSize = std::max(128u, InVT.getSizeInBits());

	// "Zero-extend" the i8 vectors. This is not a per-element zext, rather we
	// fill in the missing vector elements with 0.
	unsigned NumConcat = RegSize / InVT.getSizeInBits();
	SmallVector<SDValue, 16> Ops(NumConcat, DAG.getConstant(0, DL, InVT));
	Ops[0] = Zext0.getOperand(0);
	MVT ExtendedVT = MVT::getVectorVT(MVT::i8, RegSize / 8);
	SDValue SadOp0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
	Ops[0] = Zext1.getOperand(0);
	SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);

	// Actually build the SAD
	MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64);
	return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1);
	}

	// Attempt to replace an all_of/any_of style horizontal reduction with a MOVMSK.
	static SDValue combineHorizontalPredicateResult(SDNode *Extract,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// Bail without SSE2 or with AVX512VL (which uses predicate registers).
	if (!Subtarget.hasSSE2() \|\| Subtarget.hasVLX())
	return SDValue();

	EVT ExtractVT = Extract->getValueType(0);
	unsigned BitWidth = ExtractVT.getSizeInBits();
	if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 &&
	ExtractVT != MVT::i8)
	return SDValue();

	// Check for OR(any_of) and AND(all_of) horizontal reduction patterns.
	for (ISD::NodeType Op : {ISD::OR, ISD::AND}) {
	SDValue Match = matchBinOpReduction(Extract, Op);
	if (!Match)
	continue;

	// EXTRACT_VECTOR_ELT can require implicit extension of the vector element
	// which we can't support here for now.
	if (Match.getScalarValueSizeInBits() != BitWidth)
	continue;

	// We require AVX2 for PMOVMSKB for v16i16/v32i8;
	unsigned MatchSizeInBits = Match.getValueSizeInBits();
	if (!(MatchSizeInBits == 128 \|\|
	(MatchSizeInBits == 256 &&
	((Subtarget.hasAVX() && BitWidth >= 32) \|\| Subtarget.hasAVX2()))))
	return SDValue();

	// Don't bother performing this for 2-element vectors.
	if (Match.getValueType().getVectorNumElements() <= 2)
	return SDValue();

	// Check that we are extracting a reduction of all sign bits.
	if (DAG.ComputeNumSignBits(Match) != BitWidth)
	return SDValue();

	// For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB.
	MVT MaskVT;
	if (64 == BitWidth \|\| 32 == BitWidth)
	MaskVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth),
	MatchSizeInBits / BitWidth);
	else
	MaskVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8);

	APInt CompareBits;
	ISD::CondCode CondCode;
	if (Op == ISD::OR) {
	// any_of -> MOVMSK != 0
	CompareBits = APInt::getNullValue(32);
	CondCode = ISD::CondCode::SETNE;
	} else {
	// all_of -> MOVMSK == ((1 << NumElts) - 1)
	CompareBits = APInt::getLowBitsSet(32, MaskVT.getVectorNumElements());
	CondCode = ISD::CondCode::SETEQ;
	}

	// Perform the select as i32/i64 and then truncate to avoid partial register
	// stalls.
	unsigned ResWidth = std::max(BitWidth, 32u);
	EVT ResVT = EVT::getIntegerVT(*DAG.getContext(), ResWidth);
	SDLoc DL(Extract);
	SDValue Zero = DAG.getConstant(0, DL, ResVT);
	SDValue Ones = DAG.getAllOnesConstant(DL, ResVT);
	SDValue Res = DAG.getBitcast(MaskVT, Match);
	Res = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Res);
	Res = DAG.getSelectCC(DL, Res, DAG.getConstant(CompareBits, DL, MVT::i32),
	Ones, Zero, CondCode);
	return DAG.getSExtOrTrunc(Res, DL, ExtractVT);
	}

	return SDValue();
	}

	static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// PSADBW is only supported on SSE2 and up.
	if (!Subtarget.hasSSE2())
	return SDValue();

	// Verify the type we're extracting from is any integer type above i16.
	EVT VT = Extract->getOperand(0).getValueType();
	if (!VT.isSimple() \|\| !(VT.getVectorElementType().getSizeInBits() > 16))
	return SDValue();

	unsigned RegSize = 128;
	if (Subtarget.hasBWI())
	RegSize = 512;
	else if (Subtarget.hasAVX2())
	RegSize = 256;

	// We handle upto v16i* for SSE2 / v32i* for AVX2 / v64i* for AVX512.
	// TODO: We should be able to handle larger vectors by splitting them before
	// feeding them into several SADs, and then reducing over those.
	if (RegSize / VT.getVectorNumElements() < 8)
	return SDValue();

	// Match shuffle + add pyramid.
	SDValue Root = matchBinOpReduction(Extract, ISD::ADD);

	// The operand is expected to be zero extended from i8
	// (verified in detectZextAbsDiff).
	// In order to convert to i64 and above, additional any/zero/sign
	// extend is expected.
	// The zero extend from 32 bit has no mathematical effect on the result.
	// Also the sign extend is basically zero extend
	// (extends the sign bit which is zero).
	// So it is correct to skip the sign/zero extend instruction.
	if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND \|\|
	Root.getOpcode() == ISD::ZERO_EXTEND \|\|
	Root.getOpcode() == ISD::ANY_EXTEND))
	Root = Root.getOperand(0);

	// If there was a match, we want Root to be a select that is the root of an
	// abs-diff pattern.
	if (!Root \|\| (Root.getOpcode() != ISD::VSELECT))
	return SDValue();

	// Check whether we have an abs-diff pattern feeding into the select.
	SDValue Zext0, Zext1;
	if (!detectZextAbsDiff(Root, Zext0, Zext1))
	return SDValue();

	// Create the SAD instruction.
	SDLoc DL(Extract);
	SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL);

	// If the original vector was wider than 8 elements, sum over the results
	// in the SAD vector.
	unsigned Stages = Log2_32(VT.getVectorNumElements());
	MVT SadVT = SAD.getSimpleValueType();
	if (Stages > 3) {
	unsigned SadElems = SadVT.getVectorNumElements();

	for(unsigned i = Stages - 3; i > 0; --i) {
	SmallVector<int, 16> Mask(SadElems, -1);
	for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
	Mask[j] = MaskEnd + j;

	SDValue Shuffle =
	DAG.getVectorShuffle(SadVT, DL, SAD, DAG.getUNDEF(SadVT), Mask);
	SAD = DAG.getNode(ISD::ADD, DL, SadVT, SAD, Shuffle);
	}
	}

	MVT Type = Extract->getSimpleValueType(0);
	unsigned TypeSizeInBits = Type.getSizeInBits();
	// Return the lowest TypeSizeInBits bits.
	MVT ResVT = MVT::getVectorVT(Type, SadVT.getSizeInBits() / TypeSizeInBits);
	SAD = DAG.getNode(ISD::BITCAST, DL, ResVT, SAD);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Type, SAD,
	Extract->getOperand(1));
	}

	// Attempt to peek through a target shuffle and extract the scalar from the
	// source.
	static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	SDValue Src = N->getOperand(0);
	SDValue Idx = N->getOperand(1);

	EVT VT = N->getValueType(0);
	EVT SrcVT = Src.getValueType();
	EVT SrcSVT = SrcVT.getVectorElementType();
	unsigned NumSrcElts = SrcVT.getVectorNumElements();

	// Don't attempt this for boolean mask vectors or unknown extraction indices.
	if (SrcSVT == MVT::i1 \|\| !isa<ConstantSDNode>(Idx))
	return SDValue();

	// Resolve the target shuffle inputs and mask.
	SmallVector<int, 16> Mask;
	SmallVector<SDValue, 2> Ops;
	if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG))
	return SDValue();

	// Attempt to narrow/widen the shuffle mask to the correct size.
	if (Mask.size() != NumSrcElts) {
	if ((NumSrcElts % Mask.size()) == 0) {
	SmallVector<int, 16> ScaledMask;
	int Scale = NumSrcElts / Mask.size();
	scaleShuffleMask(Scale, Mask, ScaledMask);
	Mask = std::move(ScaledMask);
	} else if ((Mask.size() % NumSrcElts) == 0) {
	SmallVector<int, 16> WidenedMask;
	while (Mask.size() > NumSrcElts &&
	canWidenShuffleElements(Mask, WidenedMask))
	Mask = std::move(WidenedMask);
	// TODO - investigate support for wider shuffle masks with known upper
	// undef/zero elements for implicit zero-extension.
	}
	}

	// Check if narrowing/widening failed.
	if (Mask.size() != NumSrcElts)
	return SDValue();

	int SrcIdx = Mask[N->getConstantOperandVal(1)];
	SDLoc dl(N);

	// If the shuffle source element is undef/zero then we can just accept it.
	if (SrcIdx == SM_SentinelUndef)
	return DAG.getUNDEF(VT);

	if (SrcIdx == SM_SentinelZero)
	return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, dl, VT)
	: DAG.getConstant(0, dl, VT);

	SDValue SrcOp = Ops[SrcIdx / Mask.size()];
	SrcOp = DAG.getBitcast(SrcVT, SrcOp);
	SrcIdx = SrcIdx % Mask.size();

	// We can only extract other elements from 128-bit vectors and in certain
	// circumstances, depending on SSE-level.
	// TODO: Investigate using extract_subvector for larger vectors.
	// TODO: Investigate float/double extraction if it will be just stored.
	if ((SrcVT == MVT::v4i32 \|\| SrcVT == MVT::v2i64) &&
	((SrcIdx == 0 && Subtarget.hasSSE2()) \|\| Subtarget.hasSSE41())) {
	assert(SrcSVT == VT && "Unexpected extraction type");
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcSVT, SrcOp,
	DAG.getIntPtrConstant(SrcIdx, dl));
	}

	if ((SrcVT == MVT::v8i16 && Subtarget.hasSSE2()) \|\|
	(SrcVT == MVT::v16i8 && Subtarget.hasSSE41())) {
	assert(VT.getSizeInBits() >= SrcSVT.getSizeInBits() &&
	"Unexpected extraction type");
	unsigned OpCode = (SrcVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB);
	SDValue ExtOp = DAG.getNode(OpCode, dl, MVT::i32, SrcOp,
	DAG.getIntPtrConstant(SrcIdx, dl));
	SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, ExtOp,
	DAG.getValueType(SrcSVT));
	return DAG.getZExtOrTrunc(Assert, dl, VT);
	}

	return SDValue();
	}

	/// Detect vector gather/scatter index generation and convert it from being a
	/// bunch of shuffles and extracts into a somewhat faster sequence.
	/// For i686, the best sequence is apparently storing the value and loading
	/// scalars back, while for x64 we should use 64-bit extracts and shifts.
	static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
	return NewOp;

	if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget))
	return NewOp;

	SDValue InputVector = N->getOperand(0);
	SDValue EltIdx = N->getOperand(1);

	EVT SrcVT = InputVector.getValueType();
	EVT VT = N->getValueType(0);
	SDLoc dl(InputVector);

	// Detect mmx extraction of all bits as a i64. It works better as a bitcast.
	if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
	VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) {
	SDValue MMXSrc = InputVector.getOperand(0);

	// The bitcast source is a direct mmx result.
	if (MMXSrc.getValueType() == MVT::x86mmx)
	return DAG.getBitcast(VT, InputVector);
	}

	// Detect mmx to i32 conversion through a v2i32 elt extract.
	if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
	VT == MVT::i32 && SrcVT == MVT::v2i32 && isNullConstant(EltIdx)) {
	SDValue MMXSrc = InputVector.getOperand(0);

	// The bitcast source is a direct mmx result.
	if (MMXSrc.getValueType() == MVT::x86mmx)
	return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc);
	}

	if (VT == MVT::i1 && InputVector.getOpcode() == ISD::BITCAST &&
	isa<ConstantSDNode>(EltIdx) &&
	isa<ConstantSDNode>(InputVector.getOperand(0))) {
	uint64_t ExtractedElt = N->getConstantOperandVal(1);
	uint64_t InputValue = InputVector.getConstantOperandVal(0);
	uint64_t Res = (InputValue >> ExtractedElt) & 1;
	return DAG.getConstant(Res, dl, MVT::i1);
	}

	// Check whether this extract is the root of a sum of absolute differences
	// pattern. This has to be done here because we really want it to happen
	// pre-legalization,
	if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget))
	return SAD;

	// Attempt to replace an all_of/any_of horizontal reduction with a MOVMSK.
	if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget))
	return Cmp;

	// Only operate on vectors of 4 elements, where the alternative shuffling
	// gets to be more expensive.
	if (SrcVT != MVT::v4i32)
	return SDValue();

	// Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
	// single use which is a sign-extend or zero-extend, and all elements are
	// used.
	SmallVector<SDNode *, 4> Uses;
	unsigned ExtractedElements = 0;
	for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
	UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
	if (UI.getUse().getResNo() != InputVector.getResNo())
	return SDValue();

	SDNode Extract = UI;
	if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();

	if (Extract->getValueType(0) != MVT::i32)
	return SDValue();
	if (!Extract->hasOneUse())
	return SDValue();
	if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
	Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
	return SDValue();
	if (!isa<ConstantSDNode>(Extract->getOperand(1)))
	return SDValue();

	// Record which element was extracted.
	ExtractedElements \|= 1 << Extract->getConstantOperandVal(1);
	Uses.push_back(Extract);
	}

	// If not all the elements were used, this may not be worthwhile.
	if (ExtractedElements != 15)
	return SDValue();

	// Ok, we've now decided to do the transformation.
	// If 64-bit shifts are legal, use the extract-shift sequence,
	// otherwise bounce the vector off the cache.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Vals[4];

	if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
	SDValue Cst = DAG.getBitcast(MVT::v2i64, InputVector);
	auto &DL = DAG.getDataLayout();
	EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(DL);
	SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
	DAG.getConstant(0, dl, VecIdxTy));
	SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
	DAG.getConstant(1, dl, VecIdxTy));

	SDValue ShAmt = DAG.getConstant(
	32, dl, DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64, DL));
	Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf);
	Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
	DAG.getNode(ISD::SRA, dl, MVT::i64, BottomHalf, ShAmt));
	Vals[2] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, TopHalf);
	Vals[3] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
	DAG.getNode(ISD::SRA, dl, MVT::i64, TopHalf, ShAmt));
	} else {
	// Store the value to a temporary stack slot.
	SDValue StackPtr = DAG.CreateStackTemporary(SrcVT);
	SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
	MachinePointerInfo());

	EVT ElementType = SrcVT.getVectorElementType();
	unsigned EltSize = ElementType.getSizeInBits() / 8;

	// Replace each use (extract) with a load of the appropriate element.
	for (unsigned i = 0; i < 4; ++i) {
	uint64_t Offset = EltSize * i;
	auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
	SDValue OffsetVal = DAG.getConstant(Offset, dl, PtrVT);

	SDValue ScalarAddr =
	DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, OffsetVal);

	// Load the scalar.
	Vals[i] =
	DAG.getLoad(ElementType, dl, Ch, ScalarAddr, MachinePointerInfo());
	}
	}

	// Replace the extracts
	for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
	UE = Uses.end(); UI != UE; ++UI) {
	SDNode Extract = UI;

	uint64_t IdxVal = Extract->getConstantOperandVal(1);
	DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]);
	}

	// The replacement was made in place; don't return anything.
	return SDValue();
	}

	// TODO - merge with combineExtractVectorElt once it can handle the implicit
	// zero-extension of X86ISD::PINSRW/X86ISD::PINSRB in:
	// XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and
	// combineBasicSADPattern.
	static SDValue combineExtractVectorElt_SSE(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	return combineExtractWithShuffle(N, DAG, DCI, Subtarget);
	}

	/// If a vector select has an operand that is -1 or 0, try to simplify the
	/// select to a bitwise logic operation.
	static SDValue
	combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue Cond = N->getOperand(0);
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);
	EVT VT = LHS.getValueType();
	EVT CondVT = Cond.getValueType();
	SDLoc DL(N);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (N->getOpcode() != ISD::VSELECT)
	return SDValue();

	assert(CondVT.isVector() && "Vector select expects a vector selector!");

	bool FValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
	// Check if the first operand is all zeros and Cond type is vXi1.
	// This situation only applies to avx512.
	if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse() &&
	CondVT.getVectorElementType() == MVT::i1) {
	// Invert the cond to not(cond) : xor(op,allones)=not(op)
	SDValue CondNew = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
	DAG.getAllOnesConstant(DL, CondVT));
	// Vselect cond, op1, op2 = Vselect not(cond), op2, op1
	return DAG.getSelect(DL, VT, CondNew, RHS, LHS);
	}

	// To use the condition operand as a bitwise mask, it must have elements that
	// are the same size as the select elements. Ie, the condition operand must
	// have already been promoted from the IR select condition type <N x i1>.
	// Don't check if the types themselves are equal because that excludes
	// vector floating-point selects.
	if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
	return SDValue();

	bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
	FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());

	// Try to invert the condition if true value is not all 1s and false value is
	// not all 0s.
	if (!TValIsAllOnes && !FValIsAllZeros &&
	// Check if the selector will be produced by CMPP/PCMP.
	Cond.getOpcode() == ISD::SETCC &&
	// Check if SETCC has already been promoted.
	TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
	CondVT) {
	bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
	bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());

	if (TValIsAllZeros \|\| FValIsAllOnes) {
	SDValue CC = Cond.getOperand(2);
	ISD::CondCode NewCC =
	ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
	Cond.getOperand(0).getValueType().isInteger());
	Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
	NewCC);
	std::swap(LHS, RHS);
	TValIsAllOnes = FValIsAllOnes;
	FValIsAllZeros = TValIsAllZeros;
	}
	}

	// vselect Cond, 111..., 000... -> Cond
	if (TValIsAllOnes && FValIsAllZeros)
	return DAG.getBitcast(VT, Cond);

	if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(CondVT))
	return SDValue();

	// vselect Cond, 111..., X -> or Cond, X
	if (TValIsAllOnes) {
	SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
	SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS);
	return DAG.getBitcast(VT, Or);
	}

	// vselect Cond, X, 000... -> and Cond, X
	if (FValIsAllZeros) {
	SDValue CastLHS = DAG.getBitcast(CondVT, LHS);
	SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS);
	return DAG.getBitcast(VT, And);
	}

	return SDValue();
	}

	static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
	SDValue Cond = N->getOperand(0);
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);
	SDLoc DL(N);

	auto *TrueC = dyn_cast<ConstantSDNode>(LHS);
	auto *FalseC = dyn_cast<ConstantSDNode>(RHS);
	if (!TrueC \|\| !FalseC)
	return SDValue();

	// Don't do this for crazy integer types.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType()))
	return SDValue();

	// If this is efficiently invertible, canonicalize the LHSC/RHSC values
	// so that TrueC (the true value) is larger than FalseC.
	bool NeedsCondInvert = false;
	if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
	// Efficiently invertible.
	(Cond.getOpcode() == ISD::SETCC \|\| // setcc -> invertible.
	(Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible.
	isa<ConstantSDNode>(Cond.getOperand(1))))) {
	NeedsCondInvert = true;
	std::swap(TrueC, FalseC);
	}

	// Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
	if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
	if (NeedsCondInvert) // Invert the condition if needed.
	Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
	DAG.getConstant(1, DL, Cond.getValueType()));

	// Zero extend the condition if needed.
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);

	unsigned ShAmt = TrueC->getAPIntValue().logBase2();
	return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
	DAG.getConstant(ShAmt, DL, MVT::i8));
	}

	// Optimize cases that will turn into an LEA instruction. This requires
	// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
	if (N->getValueType(0) == MVT::i32 \|\| N->getValueType(0) == MVT::i64) {
	uint64_t Diff = TrueC->getZExtValue() - FalseC->getZExtValue();
	if (N->getValueType(0) == MVT::i32)
	Diff = (unsigned)Diff;

	bool IsFastMultiplier = false;
	if (Diff < 10) {
	switch ((unsigned char)Diff) {
	default:
	break;
	case 1: // result = add base, cond
	case 2: // result = lea base( , cond*2)
	case 3: // result = lea base(cond, cond*2)
	case 4: // result = lea base( , cond*4)
	case 5: // result = lea base(cond, cond*4)
	case 8: // result = lea base( , cond*8)
	case 9: // result = lea base(cond, cond*8)
	IsFastMultiplier = true;
	break;
	}
	}

	if (IsFastMultiplier) {
	APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue();
	if (NeedsCondInvert) // Invert the condition if needed.
	Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
	DAG.getConstant(1, DL, Cond.getValueType()));

	// Zero extend the condition if needed.
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
	// Scale the condition by the difference.
	if (Diff != 1)
	Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
	DAG.getConstant(Diff, DL, Cond.getValueType()));

	// Add the base if non-zero.
	if (FalseC->getAPIntValue() != 0)
	Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
	SDValue(FalseC, 0));
	return Cond;
	}
	}

	return SDValue();
	}

	// If this is a bitcasted op that can be represented as another type, push the
	// the bitcast to the inputs. This allows more opportunities for pattern
	// matching masked instructions. This is called when we know that the operation
	// is used as one of the inputs of a vselect.
	static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	// Make sure we have a bitcast.
	if (OrigOp.getOpcode() != ISD::BITCAST)
	return false;

	SDValue Op = OrigOp.getOperand(0);

	// If the operation is used by anything other than the bitcast, we shouldn't
	// do this combine as that would replicate the operation.
	if (!Op.hasOneUse())
	return false;

	MVT VT = OrigOp.getSimpleValueType();
	MVT EltVT = VT.getVectorElementType();
	SDLoc DL(Op.getNode());

	auto BitcastAndCombineShuffle = [&](unsigned Opcode, SDValue Op0, SDValue Op1,
	SDValue Op2) {
	Op0 = DAG.getBitcast(VT, Op0);
	DCI.AddToWorklist(Op0.getNode());
	Op1 = DAG.getBitcast(VT, Op1);
	DCI.AddToWorklist(Op1.getNode());
	DCI.CombineTo(OrigOp.getNode(),
	DAG.getNode(Opcode, DL, VT, Op0, Op1, Op2));
	return true;
	};

	unsigned Opcode = Op.getOpcode();
	switch (Opcode) {
	case X86ISD::PALIGNR:
	// PALIGNR can be converted to VALIGND/Q for 128-bit vectors.
	if (!VT.is128BitVector())
	return false;
	Opcode = X86ISD::VALIGN;
	LLVM_FALLTHROUGH;
	case X86ISD::VALIGN: {
	if (EltVT != MVT::i32 && EltVT != MVT::i64)
	return false;
	uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
	MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
	unsigned ShiftAmt = Imm * OpEltVT.getSizeInBits();
	unsigned EltSize = EltVT.getSizeInBits();
	// Make sure we can represent the same shift with the new VT.
	if ((ShiftAmt % EltSize) != 0)
	return false;
	Imm = ShiftAmt / EltSize;
	return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
	DAG.getConstant(Imm, DL, MVT::i8));
	}
	case X86ISD::SHUF128: {
	if (EltVT.getSizeInBits() != 32 && EltVT.getSizeInBits() != 64)
	return false;
	// Only change element size, not type.
	if (VT.isInteger() != Op.getSimpleValueType().isInteger())
	return false;
	return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
	Op.getOperand(2));
	}
	case ISD::INSERT_SUBVECTOR: {
	unsigned EltSize = EltVT.getSizeInBits();
	if (EltSize != 32 && EltSize != 64)
	return false;
	MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
	// Only change element size, not type.
	if (EltVT.isInteger() != OpEltVT.isInteger())
	return false;
	uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
	Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
	SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0));
	DCI.AddToWorklist(Op0.getNode());
	// Op1 needs to be bitcasted to a smaller vector with the same element type.
	SDValue Op1 = Op.getOperand(1);
	MVT Op1VT = MVT::getVectorVT(EltVT,
	Op1.getSimpleValueType().getSizeInBits() / EltSize);
	Op1 = DAG.getBitcast(Op1VT, Op1);
	DCI.AddToWorklist(Op1.getNode());
	DCI.CombineTo(OrigOp.getNode(),
	DAG.getNode(Opcode, DL, VT, Op0, Op1,
	DAG.getIntPtrConstant(Imm, DL)));
	return true;
	}
	case ISD::EXTRACT_SUBVECTOR: {
	unsigned EltSize = EltVT.getSizeInBits();
	if (EltSize != 32 && EltSize != 64)
	return false;
	MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
	// Only change element size, not type.
	if (EltVT.isInteger() != OpEltVT.isInteger())
	return false;
	uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
	Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
	// Op0 needs to be bitcasted to a larger vector with the same element type.
	SDValue Op0 = Op.getOperand(0);
	MVT Op0VT = MVT::getVectorVT(EltVT,
	Op0.getSimpleValueType().getSizeInBits() / EltSize);
	Op0 = DAG.getBitcast(Op0VT, Op0);
	DCI.AddToWorklist(Op0.getNode());
	DCI.CombineTo(OrigOp.getNode(),
	DAG.getNode(Opcode, DL, VT, Op0,
	DAG.getIntPtrConstant(Imm, DL)));
	return true;
	}
	case X86ISD::SUBV_BROADCAST: {
	unsigned EltSize = EltVT.getSizeInBits();
	if (EltSize != 32 && EltSize != 64)
	return false;
	// Only change element size, not type.
	if (VT.isInteger() != Op.getSimpleValueType().isInteger())
	return false;
	SDValue Op0 = Op.getOperand(0);
	MVT Op0VT = MVT::getVectorVT(EltVT,
	Op0.getSimpleValueType().getSizeInBits() / EltSize);
	Op0 = DAG.getBitcast(Op0VT, Op.getOperand(0));
	DCI.AddToWorklist(Op0.getNode());
	DCI.CombineTo(OrigOp.getNode(),
	DAG.getNode(Opcode, DL, VT, Op0));
	return true;
	}
	}

	return false;
	}

	/// Do target-specific dag combines on SELECT and VSELECT nodes.
	static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDLoc DL(N);
	SDValue Cond = N->getOperand(0);
	// Get the LHS/RHS of the select.
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);
	EVT VT = LHS.getValueType();
	EVT CondVT = Cond.getValueType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// If we have SSE[12] support, try to form min/max nodes. SSE min/max
	// instructions match the semantics of the common C idiom x<y?x:y but not
	// x<=y?x:y, because of how they handle negative zero (which can be
	// ignored in unsafe-math mode).
	// We also try to create v2f32 min/max nodes, which we later widen to v4f32.
	if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
	VT != MVT::f80 && VT != MVT::f128 &&
	(TLI.isTypeLegal(VT) \|\| VT == MVT::v2f32) &&
	(Subtarget.hasSSE2() \|\|
	(Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) {
	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

	unsigned Opcode = 0;
	// Check for x CC y ? x : y.
	if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
	DAG.isEqualTo(RHS, Cond.getOperand(1))) {
	switch (CC) {
	default: break;
	case ISD::SETULT:
	// Converting this to a min would handle NaNs incorrectly, and swapping
	// the operands would cause it to handle comparisons between positive
	// and negative zero incorrectly.
	if (!DAG.isKnownNeverNaN(LHS) \|\| !DAG.isKnownNeverNaN(RHS)) {
	if (!DAG.getTarget().Options.UnsafeFPMath &&
	!(DAG.isKnownNeverZero(LHS) \|\| DAG.isKnownNeverZero(RHS)))
	break;
	std::swap(LHS, RHS);
	}
	Opcode = X86ISD::FMIN;
	break;
	case ISD::SETOLE:
	// Converting this to a min would handle comparisons between positive
	// and negative zero incorrectly.
	if (!DAG.getTarget().Options.UnsafeFPMath &&
	!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
	break;
	Opcode = X86ISD::FMIN;
	break;
	case ISD::SETULE:
	// Converting this to a min would handle both negative zeros and NaNs
	// incorrectly, but we can swap the operands to fix both.
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ISD::SETOLT:
	case ISD::SETLT:
	case ISD::SETLE:
	Opcode = X86ISD::FMIN;
	break;

	case ISD::SETOGE:
	// Converting this to a max would handle comparisons between positive
	// and negative zero incorrectly.
	if (!DAG.getTarget().Options.UnsafeFPMath &&
	!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
	break;
	Opcode = X86ISD::FMAX;
	break;
	case ISD::SETUGT:
	// Converting this to a max would handle NaNs incorrectly, and swapping
	// the operands would cause it to handle comparisons between positive
	// and negative zero incorrectly.
	if (!DAG.isKnownNeverNaN(LHS) \|\| !DAG.isKnownNeverNaN(RHS)) {
	if (!DAG.getTarget().Options.UnsafeFPMath &&
	!(DAG.isKnownNeverZero(LHS) \|\| DAG.isKnownNeverZero(RHS)))
	break;
	std::swap(LHS, RHS);
	}
	Opcode = X86ISD::FMAX;
	break;
	case ISD::SETUGE:
	// Converting this to a max would handle both negative zeros and NaNs
	// incorrectly, but we can swap the operands to fix both.
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ISD::SETOGT:
	case ISD::SETGT:
	case ISD::SETGE:
	Opcode = X86ISD::FMAX;
	break;
	}
	// Check for x CC y ? y : x -- a min/max with reversed arms.
	} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
	DAG.isEqualTo(RHS, Cond.getOperand(0))) {
	switch (CC) {
	default: break;
	case ISD::SETOGE:
	// Converting this to a min would handle comparisons between positive
	// and negative zero incorrectly, and swapping the operands would
	// cause it to handle NaNs incorrectly.
	if (!DAG.getTarget().Options.UnsafeFPMath &&
	!(DAG.isKnownNeverZero(LHS) \|\| DAG.isKnownNeverZero(RHS))) {
	if (!DAG.isKnownNeverNaN(LHS) \|\| !DAG.isKnownNeverNaN(RHS))
	break;
	std::swap(LHS, RHS);
	}
	Opcode = X86ISD::FMIN;
	break;
	case ISD::SETUGT:
	// Converting this to a min would handle NaNs incorrectly.
	if (!DAG.getTarget().Options.UnsafeFPMath &&
	(!DAG.isKnownNeverNaN(LHS) \|\| !DAG.isKnownNeverNaN(RHS)))
	break;
	Opcode = X86ISD::FMIN;
	break;
	case ISD::SETUGE:
	// Converting this to a min would handle both negative zeros and NaNs
	// incorrectly, but we can swap the operands to fix both.
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ISD::SETOGT:
	case ISD::SETGT:
	case ISD::SETGE:
	Opcode = X86ISD::FMIN;
	break;

	case ISD::SETULT:
	// Converting this to a max would handle NaNs incorrectly.
	if (!DAG.isKnownNeverNaN(LHS) \|\| !DAG.isKnownNeverNaN(RHS))
	break;
	Opcode = X86ISD::FMAX;
	break;
	case ISD::SETOLE:
	// Converting this to a max would handle comparisons between positive
	// and negative zero incorrectly, and swapping the operands would
	// cause it to handle NaNs incorrectly.
	if (!DAG.getTarget().Options.UnsafeFPMath &&
	!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
	if (!DAG.isKnownNeverNaN(LHS) \|\| !DAG.isKnownNeverNaN(RHS))
	break;
	std::swap(LHS, RHS);
	}
	Opcode = X86ISD::FMAX;
	break;
	case ISD::SETULE:
	// Converting this to a max would handle both negative zeros and NaNs
	// incorrectly, but we can swap the operands to fix both.
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ISD::SETOLT:
	case ISD::SETLT:
	case ISD::SETLE:
	Opcode = X86ISD::FMAX;
	break;
	}
	}

	if (Opcode)
	return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
	}

	// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
	// lowering on KNL. In this case we convert it to
	// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
	// The same situation for all 128 and 256-bit vectors of i8 and i16.
	// Since SKX these selects have a proper lowering.
	if (Subtarget.hasAVX512() && CondVT.isVector() &&
	CondVT.getVectorElementType() == MVT::i1 &&
	(VT.is128BitVector() \|\| VT.is256BitVector()) &&
	(VT.getVectorElementType() == MVT::i8 \|\|
	VT.getVectorElementType() == MVT::i16) &&
	!(Subtarget.hasBWI() && Subtarget.hasVLX())) {
	Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
	DCI.AddToWorklist(Cond.getNode());
	return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS);
	}

	if (SDValue V = combineSelectOfTwoConstants(N, DAG))
	return V;

	// Canonicalize max and min:
	// (x > y) ? x : y -> (x >= y) ? x : y
	// (x < y) ? x : y -> (x <= y) ? x : y
	// This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates
	// the need for an extra compare
	// against zero. e.g.
	// (x - y) > 0 : (x - y) ? 0 -> (x - y) >= 0 : (x - y) ? 0
	// subl %esi, %edi
	// testl %edi, %edi
	// movl $0, %eax
	// cmovgl %edi, %eax
	// =>
	// xorl %eax, %eax
	// subl %esi, $edi
	// cmovsl %eax, %edi
	if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
	DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
	DAG.isEqualTo(RHS, Cond.getOperand(1))) {
	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
	switch (CC) {
	default: break;
	case ISD::SETLT:
	case ISD::SETGT: {
	ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE;
	Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(),
	Cond.getOperand(0), Cond.getOperand(1), NewCC);
	return DAG.getSelect(DL, VT, Cond, LHS, RHS);
	}
	}
	}

	// Early exit check
	if (!TLI.isTypeLegal(VT))
	return SDValue();

	// Match VSELECTs into subs with unsigned saturation.
	if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
	// psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
	((Subtarget.hasSSE2() && (VT == MVT::v16i8 \|\| VT == MVT::v8i16)) \|\|
	(Subtarget.hasAVX2() && (VT == MVT::v32i8 \|\| VT == MVT::v16i16)))) {
	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

	// Check if one of the arms of the VSELECT is a zero vector. If it's on the
	// left side invert the predicate to simplify logic below.
	SDValue Other;
	if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
	Other = RHS;
	CC = ISD::getSetCCInverse(CC, true);
	} else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
	Other = LHS;
	}

	if (Other.getNode() && Other->getNumOperands() == 2 &&
	DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) {
	SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
	SDValue CondRHS = Cond->getOperand(1);

	// Look for a general sub with unsigned saturation first.
	// x >= y ? x-y : 0 --> subus x, y
	// x > y ? x-y : 0 --> subus x, y
	if ((CC == ISD::SETUGE \|\| CC == ISD::SETUGT) &&
	Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
	return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);

	if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS))
	if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
	if (auto *CondRHSBV = dyn_cast<BuildVectorSDNode>(CondRHS))
	if (auto *CondRHSConst = CondRHSBV->getConstantSplatNode())
	// If the RHS is a constant we have to reverse the const
	// canonicalization.
	// x > C-1 ? x+-C : 0 --> subus x, C
	if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
	CondRHSConst->getAPIntValue() ==
	(-OpRHSConst->getAPIntValue() - 1))
	return DAG.getNode(
	X86ISD::SUBUS, DL, VT, OpLHS,
	DAG.getConstant(-OpRHSConst->getAPIntValue(), DL, VT));

	// Another special case: If C was a sign bit, the sub has been
	// canonicalized into a xor.
	// FIXME: Would it be better to use computeKnownBits to determine
	// whether it's safe to decanonicalize the xor?
	// x s< 0 ? x^C : 0 --> subus x, C
	if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
	ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
	OpRHSConst->getAPIntValue().isSignMask())
	// Note that we have to rebuild the RHS constant here to ensure we
	// don't rely on particular values of undef lanes.
	return DAG.getNode(
	X86ISD::SUBUS, DL, VT, OpLHS,
	DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT));
	}
	}
	}

	if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget))
	return V;

	// If this is a dynamic select (non-constant condition) and we can match
	// this node with one of the variable blend instructions, restructure the
	// condition so that blends can use the high (sign) bit of each element and
	// use SimplifyDemandedBits to simplify the condition operand.
	if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
	!DCI.isBeforeLegalize() &&
	!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
	unsigned BitWidth = Cond.getScalarValueSizeInBits();

	// Don't optimize vector selects that map to mask-registers.
	if (BitWidth == 1)
	return SDValue();

	// We can only handle the cases where VSELECT is directly legal on the
	// subtarget. We custom lower VSELECT nodes with constant conditions and
	// this makes it hard to see whether a dynamic VSELECT will correctly
	// lower, so we both check the operation's status and explicitly handle the
	// cases where a dynamic blend will fail even though a constant-condition
	// blend could be custom lowered.
	// FIXME: We should find a better way to handle this class of problems.
	// Potentially, we should combine constant-condition vselect nodes
	// pre-legalization into shuffles and not mark as many types as custom
	// lowered.
	if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
	return SDValue();
	// FIXME: We don't support i16-element blends currently. We could and
	// should support them by making all the bits in the condition be set
	// rather than just the high bit and using an i8-element blend.
	if (VT.getVectorElementType() == MVT::i16)
	return SDValue();
	// Dynamic blending was only available from SSE4.1 onward.
	if (VT.is128BitVector() && !Subtarget.hasSSE41())
	return SDValue();
	// Byte blends are only available in AVX2
	if (VT == MVT::v32i8 && !Subtarget.hasAVX2())
	return SDValue();
	+ // There are no 512-bit blend instructions that use sign bits.
	+ if (VT.is512BitVector())
	+ return SDValue();

	assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
	APInt DemandedMask(APInt::getSignMask(BitWidth));
	KnownBits Known;
	TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
	!DCI.isBeforeLegalizeOps());
	if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) \|\|
	TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO)) {
	// If we changed the computation somewhere in the DAG, this change will
	// affect all users of Cond. Make sure it is fine and update all the nodes
	// so that we do not use the generic VSELECT anymore. Otherwise, we may
	// perform wrong optimizations as we messed with the actual expectation
	// for the vector boolean values.
	if (Cond != TLO.Old) {
	// Check all uses of the condition operand to check whether it will be
	// consumed by non-BLEND instructions. Those may require that all bits
	// are set properly.
	for (SDNode *U : Cond->uses()) {
	// TODO: Add other opcodes eventually lowered into BLEND.
	if (U->getOpcode() != ISD::VSELECT)
	return SDValue();
	}

	// Update all users of the condition before committing the change, so
	// that the VSELECT optimizations that expect the correct vector boolean
	// value will not be triggered.
	for (SDNode *U : Cond->uses()) {
	SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(U),
	U->getValueType(0), Cond, U->getOperand(1),
	U->getOperand(2));
	DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
	}
	DCI.CommitTargetLoweringOpt(TLO);
	return SDValue();
	}
	// Only Cond (rather than other nodes in the computation chain) was
	// changed. Change the condition just for N to keep the opportunity to
	// optimize all other users their own way.
	SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, DL, VT, TLO.New, LHS, RHS);
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), SB);
	return SDValue();
	}
	}

	// Look for vselects with LHS/RHS being bitcasted from an operation that
	// can be executed on another type. Push the bitcast to the inputs of
	// the operation. This exposes opportunities for using masking instructions.
	if (N->getOpcode() == ISD::VSELECT && DCI.isAfterLegalizeVectorOps() &&
	CondVT.getVectorElementType() == MVT::i1) {
	if (combineBitcastForMaskedOp(LHS, DAG, DCI))
	return SDValue(N, 0);
	if (combineBitcastForMaskedOp(RHS, DAG, DCI))
	return SDValue(N, 0);
	}

	// Custom action for SELECT MMX
	if (VT == MVT::x86mmx) {
	LHS = DAG.getBitcast(MVT::i64, LHS);
	RHS = DAG.getBitcast(MVT::i64, RHS);
	SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::i64, Cond, LHS, RHS);
	return DAG.getBitcast(VT, newSelect);
	}

	return SDValue();
	}

	/// Combine:
	/// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S)
	/// to:
	/// (brcond/cmov/setcc .., (LADD x, 1), COND_LE)
	/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
	/// Note that this is only legal for some op/cc combinations.
	static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
	SelectionDAG &DAG) {
	// This combine only operates on CMP-like nodes.
	if (!(Cmp.getOpcode() == X86ISD::CMP \|\|
	(Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
	return SDValue();

	// Can't replace the cmp if it has more uses than the one we're looking at.
	// FIXME: We would like to be able to handle this, but would need to make sure
	// all uses were updated.
	if (!Cmp.hasOneUse())
	return SDValue();

	// This only applies to variations of the common case:
	// (icmp slt x, 0) -> (icmp sle (add x, 1), 0)
	// (icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
	// (icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
	// (icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
	// Using the proper condcodes (see below), overflow is checked for.

	// FIXME: We can generalize both constraints:
	// - XOR/OR/AND (if they were made to survive AtomicExpand)
	// - LHS != 1
	// if the result is compared.

	SDValue CmpLHS = Cmp.getOperand(0);
	SDValue CmpRHS = Cmp.getOperand(1);

	if (!CmpLHS.hasOneUse())
	return SDValue();

	auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
	if (!CmpRHSC \|\| CmpRHSC->getZExtValue() != 0)
	return SDValue();

	const unsigned Opc = CmpLHS.getOpcode();

	if (Opc != ISD::ATOMIC_LOAD_ADD && Opc != ISD::ATOMIC_LOAD_SUB)
	return SDValue();

	SDValue OpRHS = CmpLHS.getOperand(2);
	auto *OpRHSC = dyn_cast<ConstantSDNode>(OpRHS);
	if (!OpRHSC)
	return SDValue();

	APInt Addend = OpRHSC->getAPIntValue();
	if (Opc == ISD::ATOMIC_LOAD_SUB)
	Addend = -Addend;

	if (CC == X86::COND_S && Addend == 1)
	CC = X86::COND_LE;
	else if (CC == X86::COND_NS && Addend == 1)
	CC = X86::COND_G;
	else if (CC == X86::COND_G && Addend == -1)
	CC = X86::COND_GE;
	else if (CC == X86::COND_LE && Addend == -1)
	CC = X86::COND_L;
	else
	return SDValue();

	SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG);
	DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
	DAG.getUNDEF(CmpLHS.getValueType()));
	DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
	return LockOp;
	}

	// Check whether a boolean test is testing a boolean value generated by
	// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
	// code.
	//
	// Simplify the following patterns:
	// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or
	// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ)
	// to (Op EFLAGS Cond)
	//
	// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or
	// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ)
	// to (Op EFLAGS !Cond)
	//
	// where Op could be BRCOND or CMOV.
	//
	static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
	// This combine only operates on CMP-like nodes.
	if (!(Cmp.getOpcode() == X86ISD::CMP \|\|
	(Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
	return SDValue();

	// Quit if not used as a boolean value.
	if (CC != X86::COND_E && CC != X86::COND_NE)
	return SDValue();

	// Check CMP operands. One of them should be 0 or 1 and the other should be
	// an SetCC or extended from it.
	SDValue Op1 = Cmp.getOperand(0);
	SDValue Op2 = Cmp.getOperand(1);

	SDValue SetCC;
	const ConstantSDNode* C = nullptr;
	bool needOppositeCond = (CC == X86::COND_E);
	bool checkAgainstTrue = false; // Is it a comparison against 1?

	if ((C = dyn_cast<ConstantSDNode>(Op1)))
	SetCC = Op2;
	else if ((C = dyn_cast<ConstantSDNode>(Op2)))
	SetCC = Op1;
	else // Quit if all operands are not constants.
	return SDValue();

	if (C->getZExtValue() == 1) {
	needOppositeCond = !needOppositeCond;
	checkAgainstTrue = true;
	} else if (C->getZExtValue() != 0)
	// Quit if the constant is neither 0 or 1.
	return SDValue();

	bool truncatedToBoolWithAnd = false;
	// Skip (zext $x), (trunc $x), or (and $x, 1) node.
	while (SetCC.getOpcode() == ISD::ZERO_EXTEND \|\|
	SetCC.getOpcode() == ISD::TRUNCATE \|\|
	SetCC.getOpcode() == ISD::AND) {
	if (SetCC.getOpcode() == ISD::AND) {
	int OpIdx = -1;
	if (isOneConstant(SetCC.getOperand(0)))
	OpIdx = 1;
	if (isOneConstant(SetCC.getOperand(1)))
	OpIdx = 0;
	if (OpIdx < 0)
	break;
	SetCC = SetCC.getOperand(OpIdx);
	truncatedToBoolWithAnd = true;
	} else
	SetCC = SetCC.getOperand(0);
	}

	switch (SetCC.getOpcode()) {
	case X86ISD::SETCC_CARRY:
	// Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to
	// simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1,
	// i.e. it's a comparison against true but the result of SETCC_CARRY is not
	// truncated to i1 using 'and'.
	if (checkAgainstTrue && !truncatedToBoolWithAnd)
	break;
	assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
	"Invalid use of SETCC_CARRY!");
	LLVM_FALLTHROUGH;
	case X86ISD::SETCC:
	// Set the condition code or opposite one if necessary.
	CC = X86::CondCode(SetCC.getConstantOperandVal(0));
	if (needOppositeCond)
	CC = X86::GetOppositeBranchCondition(CC);
	return SetCC.getOperand(1);
	case X86ISD::CMOV: {
	// Check whether false/true value has canonical one, i.e. 0 or 1.
	ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0));
	ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1));
	// Quit if true value is not a constant.
	if (!TVal)
	return SDValue();
	// Quit if false value is not a constant.
	if (!FVal) {
	SDValue Op = SetCC.getOperand(0);
	// Skip 'zext' or 'trunc' node.
	if (Op.getOpcode() == ISD::ZERO_EXTEND \|\|
	Op.getOpcode() == ISD::TRUNCATE)
	Op = Op.getOperand(0);
	// A special case for rdrand/rdseed, where 0 is set if false cond is
	// found.
	if ((Op.getOpcode() != X86ISD::RDRAND &&
	Op.getOpcode() != X86ISD::RDSEED) \|\| Op.getResNo() != 0)
	return SDValue();
	}
	// Quit if false value is not the constant 0 or 1.
	bool FValIsFalse = true;
	if (FVal && FVal->getZExtValue() != 0) {
	if (FVal->getZExtValue() != 1)
	return SDValue();
	// If FVal is 1, opposite cond is needed.
	needOppositeCond = !needOppositeCond;
	FValIsFalse = false;
	}
	// Quit if TVal is not the constant opposite of FVal.
	if (FValIsFalse && TVal->getZExtValue() != 1)
	return SDValue();
	if (!FValIsFalse && TVal->getZExtValue() != 0)
	return SDValue();
	CC = X86::CondCode(SetCC.getConstantOperandVal(2));
	if (needOppositeCond)
	CC = X86::GetOppositeBranchCondition(CC);
	return SetCC.getOperand(3);
	}
	}

	return SDValue();
	}

	/// Check whether Cond is an AND/OR of SETCCs off of the same EFLAGS.
	/// Match:
	/// (X86or (X86setcc) (X86setcc))
	/// (X86cmp (and (X86setcc) (X86setcc)), 0)
	static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
	X86::CondCode &CC1, SDValue &Flags,
	bool &isAnd) {
	if (Cond->getOpcode() == X86ISD::CMP) {
	if (!isNullConstant(Cond->getOperand(1)))
	return false;

	Cond = Cond->getOperand(0);
	}

	isAnd = false;

	SDValue SetCC0, SetCC1;
	switch (Cond->getOpcode()) {
	default: return false;
	case ISD::AND:
	case X86ISD::AND:
	isAnd = true;
	LLVM_FALLTHROUGH;
	case ISD::OR:
	case X86ISD::OR:
	SetCC0 = Cond->getOperand(0);
	SetCC1 = Cond->getOperand(1);
	break;
	};

	// Make sure we have SETCC nodes, using the same flags value.
	if (SetCC0.getOpcode() != X86ISD::SETCC \|\|
	SetCC1.getOpcode() != X86ISD::SETCC \|\|
	SetCC0->getOperand(1) != SetCC1->getOperand(1))
	return false;

	CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0);
	CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0);
	Flags = SetCC0->getOperand(1);
	return true;
	}

	/// Optimize an EFLAGS definition used according to the condition code \p CC
	/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
	/// uses of chain values.
	static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
	SelectionDAG &DAG) {
	if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
	return R;
	return combineSetCCAtomicArith(EFLAGS, CC, DAG);
	}

	/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
	static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDLoc DL(N);

	// If the flag operand isn't dead, don't touch this CMOV.
	if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
	return SDValue();

	SDValue FalseOp = N->getOperand(0);
	SDValue TrueOp = N->getOperand(1);
	X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
	SDValue Cond = N->getOperand(3);

	if (CC == X86::COND_E \|\| CC == X86::COND_NE) {
	switch (Cond.getOpcode()) {
	default: break;
	case X86ISD::BSR:
	case X86ISD::BSF:
	// If operand of BSR / BSF are proven never zero, then ZF cannot be set.
	if (DAG.isKnownNeverZero(Cond.getOperand(0)))
	return (CC == X86::COND_E) ? FalseOp : TrueOp;
	}
	}

	// Try to simplify the EFLAGS and condition code operands.
	// We can't always do this as FCMOV only supports a subset of X86 cond.
	if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG)) {
	if (FalseOp.getValueType() != MVT::f80 \|\| hasFPCMov(CC)) {
	SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8),
	Flags};
	return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
	}
	}

	// If this is a select between two integer constants, try to do some
	// optimizations. Note that the operands are ordered the opposite of SELECT
	// operands.
	if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
	if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
	// Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
	// larger than FalseC (the false value).
	if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
	CC = X86::GetOppositeBranchCondition(CC);
	std::swap(TrueC, FalseC);
	std::swap(TrueOp, FalseOp);
	}

	// Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
	// This is efficient for any integer data type (including i8/i16) and
	// shift amount.
	if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
	Cond = getSETCC(CC, Cond, DL, DAG);

	// Zero extend the condition if needed.
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);

	unsigned ShAmt = TrueC->getAPIntValue().logBase2();
	Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
	DAG.getConstant(ShAmt, DL, MVT::i8));
	if (N->getNumValues() == 2) // Dead flag value?
	return DCI.CombineTo(N, Cond, SDValue());
	return Cond;
	}

	// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
	// for any integer data type, including i8/i16.
	if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
	Cond = getSETCC(CC, Cond, DL, DAG);

	// Zero extend the condition if needed.
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
	FalseC->getValueType(0), Cond);
	Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
	SDValue(FalseC, 0));

	if (N->getNumValues() == 2) // Dead flag value?
	return DCI.CombineTo(N, Cond, SDValue());
	return Cond;
	}

	// Optimize cases that will turn into an LEA instruction. This requires
	// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
	if (N->getValueType(0) == MVT::i32 \|\| N->getValueType(0) == MVT::i64) {
	uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
	if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;

	bool isFastMultiplier = false;
	if (Diff < 10) {
	switch ((unsigned char)Diff) {
	default: break;
	case 1: // result = add base, cond
	case 2: // result = lea base( , cond*2)
	case 3: // result = lea base(cond, cond*2)
	case 4: // result = lea base( , cond*4)
	case 5: // result = lea base(cond, cond*4)
	case 8: // result = lea base( , cond*8)
	case 9: // result = lea base(cond, cond*8)
	isFastMultiplier = true;
	break;
	}
	}

	if (isFastMultiplier) {
	APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
	Cond = getSETCC(CC, Cond, DL ,DAG);
	// Zero extend the condition if needed.
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
	Cond);
	// Scale the condition by the difference.
	if (Diff != 1)
	Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
	DAG.getConstant(Diff, DL, Cond.getValueType()));

	// Add the base if non-zero.
	if (FalseC->getAPIntValue() != 0)
	Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
	SDValue(FalseC, 0));
	if (N->getNumValues() == 2) // Dead flag value?
	return DCI.CombineTo(N, Cond, SDValue());
	return Cond;
	}
	}
	}
	}

	// Handle these cases:
	// (select (x != c), e, c) -> select (x != c), e, x),
	// (select (x == c), c, e) -> select (x == c), x, e)
	// where the c is an integer constant, and the "select" is the combination
	// of CMOV and CMP.
	//
	// The rationale for this change is that the conditional-move from a constant
	// needs two instructions, however, conditional-move from a register needs
	// only one instruction.
	//
	// CAVEAT: By replacing a constant with a symbolic value, it may obscure
	// some instruction-combining opportunities. This opt needs to be
	// postponed as late as possible.
	//
	if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) {
	// the DCI.xxxx conditions are provided to postpone the optimization as
	// late as possible.

	ConstantSDNode *CmpAgainst = nullptr;
	if ((Cond.getOpcode() == X86ISD::CMP \|\| Cond.getOpcode() == X86ISD::SUB) &&
	(CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
	!isa<ConstantSDNode>(Cond.getOperand(0))) {

	if (CC == X86::COND_NE &&
	CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
	CC = X86::GetOppositeBranchCondition(CC);
	std::swap(TrueOp, FalseOp);
	}

	if (CC == X86::COND_E &&
	CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
	SDValue Ops[] = { FalseOp, Cond.getOperand(0),
	DAG.getConstant(CC, DL, MVT::i8), Cond };
	return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops);
	}
	}
	}

	// Fold and/or of setcc's to double CMOV:
	// (CMOV F, T, ((cc1 \| cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2)
	// (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2)
	//
	// This combine lets us generate:
	// cmovcc1 (jcc1 if we don't have CMOV)
	// cmovcc2 (same)
	// instead of:
	// setcc1
	// setcc2
	// and/or
	// cmovne (jne if we don't have CMOV)
	// When we can't use the CMOV instruction, it might increase branch
	// mispredicts.
	// When we can use CMOV, or when there is no mispredict, this improves
	// throughput and reduces register pressure.
	//
	if (CC == X86::COND_NE) {
	SDValue Flags;
	X86::CondCode CC0, CC1;
	bool isAndSetCC;
	if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) {
	if (isAndSetCC) {
	std::swap(FalseOp, TrueOp);
	CC0 = X86::GetOppositeBranchCondition(CC0);
	CC1 = X86::GetOppositeBranchCondition(CC1);
	}

	SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, DL, MVT::i8),
	Flags};
	SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), LOps);
	SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, DL, MVT::i8), Flags};
	SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(CMOV.getNode(), 1));
	return CMOV;
	}
	}

	return SDValue();
	}

	/// Different mul shrinking modes.
	enum ShrinkMode { MULS8, MULU8, MULS16, MULU16 };

	static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) {
	EVT VT = N->getOperand(0).getValueType();
	if (VT.getScalarSizeInBits() != 32)
	return false;

	assert(N->getNumOperands() == 2 && "NumOperands of Mul are 2");
	unsigned SignBits[2] = {1, 1};
	bool IsPositive[2] = {false, false};
	for (unsigned i = 0; i < 2; i++) {
	SDValue Opd = N->getOperand(i);

	// DAG.ComputeNumSignBits return 1 for ISD::ANY_EXTEND, so we need to
	// compute signbits for it separately.
	if (Opd.getOpcode() == ISD::ANY_EXTEND) {
	// For anyextend, it is safe to assume an appropriate number of leading
	// sign/zero bits.
	if (Opd.getOperand(0).getValueType().getVectorElementType() == MVT::i8)
	SignBits[i] = 25;
	else if (Opd.getOperand(0).getValueType().getVectorElementType() ==
	MVT::i16)
	SignBits[i] = 17;
	else
	return false;
	IsPositive[i] = true;
	} else if (Opd.getOpcode() == ISD::BUILD_VECTOR) {
	// All the operands of BUILD_VECTOR need to be int constant.
	// Find the smallest value range which all the operands belong to.
	SignBits[i] = 32;
	IsPositive[i] = true;
	for (const SDValue &SubOp : Opd.getNode()->op_values()) {
	if (SubOp.isUndef())
	continue;
	auto *CN = dyn_cast<ConstantSDNode>(SubOp);
	if (!CN)
	return false;
	APInt IntVal = CN->getAPIntValue();
	if (IntVal.isNegative())
	IsPositive[i] = false;
	SignBits[i] = std::min(SignBits[i], IntVal.getNumSignBits());
	}
	} else {
	SignBits[i] = DAG.ComputeNumSignBits(Opd);
	if (Opd.getOpcode() == ISD::ZERO_EXTEND)
	IsPositive[i] = true;
	}
	}

	bool AllPositive = IsPositive[0] && IsPositive[1];
	unsigned MinSignBits = std::min(SignBits[0], SignBits[1]);
	// When ranges are from -128 ~ 127, use MULS8 mode.
	if (MinSignBits >= 25)
	Mode = MULS8;
	// When ranges are from 0 ~ 255, use MULU8 mode.
	else if (AllPositive && MinSignBits >= 24)
	Mode = MULU8;
	// When ranges are from -32768 ~ 32767, use MULS16 mode.
	else if (MinSignBits >= 17)
	Mode = MULS16;
	// When ranges are from 0 ~ 65535, use MULU16 mode.
	else if (AllPositive && MinSignBits >= 16)
	Mode = MULU16;
	else
	return false;
	return true;
	}

	/// When the operands of vector mul are extended from smaller size values,
	/// like i8 and i16, the type of mul may be shrinked to generate more
	/// efficient code. Two typical patterns are handled:
	/// Pattern1:
	/// %2 = sext/zext <N x i8> %1 to <N x i32>
	/// %4 = sext/zext <N x i8> %3 to <N x i32>
	// or %4 = build_vector <N x i32> %C1, ..., %CN (%C1..%CN are constants)
	/// %5 = mul <N x i32> %2, %4
	///
	/// Pattern2:
	/// %2 = zext/sext <N x i16> %1 to <N x i32>
	/// %4 = zext/sext <N x i16> %3 to <N x i32>
	/// or %4 = build_vector <N x i32> %C1, ..., %CN (%C1..%CN are constants)
	/// %5 = mul <N x i32> %2, %4
	///
	/// There are four mul shrinking modes:
	/// If %2 == sext32(trunc8(%2)), i.e., the scalar value range of %2 is
	/// -128 to 128, and the scalar value range of %4 is also -128 to 128,
	/// generate pmullw+sext32 for it (MULS8 mode).
	/// If %2 == zext32(trunc8(%2)), i.e., the scalar value range of %2 is
	/// 0 to 255, and the scalar value range of %4 is also 0 to 255,
	/// generate pmullw+zext32 for it (MULU8 mode).
	/// If %2 == sext32(trunc16(%2)), i.e., the scalar value range of %2 is
	/// -32768 to 32767, and the scalar value range of %4 is also -32768 to 32767,
	/// generate pmullw+pmulhw for it (MULS16 mode).
	/// If %2 == zext32(trunc16(%2)), i.e., the scalar value range of %2 is
	/// 0 to 65535, and the scalar value range of %4 is also 0 to 65535,
	/// generate pmullw+pmulhuw for it (MULU16 mode).
	static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// Check for legality
	// pmullw/pmulhw are not supported by SSE.
	if (!Subtarget.hasSSE2())
	return SDValue();

	// Check for profitability
	// pmulld is supported since SSE41. It is better to use pmulld
	// instead of pmullw+pmulhw, except for subtargets where pmulld is slower than
	// the expansion.
	bool OptForMinSize = DAG.getMachineFunction().getFunction()->optForMinSize();
	if (Subtarget.hasSSE41() && (OptForMinSize \|\| !Subtarget.isPMULLDSlow()))
	return SDValue();

	ShrinkMode Mode;
	if (!canReduceVMulWidth(N, DAG, Mode))
	return SDValue();

	SDLoc DL(N);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getOperand(0).getValueType();
	unsigned RegSize = 128;
	MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16);
	EVT ReducedVT =
	EVT::getVectorVT(*DAG.getContext(), MVT::i16, VT.getVectorNumElements());
	// Shrink the operands of mul.
	SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0);
	SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1);

	if (VT.getVectorNumElements() >= OpsVT.getVectorNumElements()) {
	// Generate the lower part of mul: pmullw. For MULU8/MULS8, only the
	// lower part is needed.
	SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
	if (Mode == MULU8 \|\| Mode == MULS8) {
	return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND,
	DL, VT, MulLo);
	} else {
	MVT ResVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
	// Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16,
	// the higher part is also needed.
	SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
	ReducedVT, NewN0, NewN1);

	// Repack the lower part and higher part result of mul into a wider
	// result.
	// Generate shuffle functioning as punpcklwd.
	SmallVector<int, 16> ShuffleMask(VT.getVectorNumElements());
	for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++) {
	ShuffleMask[2 * i] = i;
	ShuffleMask[2 * i + 1] = i + VT.getVectorNumElements();
	}
	SDValue ResLo =
	DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
	ResLo = DAG.getNode(ISD::BITCAST, DL, ResVT, ResLo);
	// Generate shuffle functioning as punpckhwd.
	for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++) {
	ShuffleMask[2 * i] = i + VT.getVectorNumElements() / 2;
	ShuffleMask[2 * i + 1] = i + VT.getVectorNumElements() * 3 / 2;
	}
	SDValue ResHi =
	DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask);
	ResHi = DAG.getNode(ISD::BITCAST, DL, ResVT, ResHi);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);
	}
	} else {
	// When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want
	// to legalize the mul explicitly because implicit legalization for type
	// <4 x i16> to <4 x i32> sometimes involves unnecessary unpack
	// instructions which will not exist when we explicitly legalize it by
	// extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with
	// <4 x i16> undef).
	//
	// Legalize the operands of mul.
	// FIXME: We may be able to handle non-concatenated vectors by insertion.
	unsigned ReducedSizeInBits = ReducedVT.getSizeInBits();
	if ((RegSize % ReducedSizeInBits) != 0)
	return SDValue();

	SmallVector<SDValue, 16> Ops(RegSize / ReducedSizeInBits,
	DAG.getUNDEF(ReducedVT));
	Ops[0] = NewN0;
	NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
	Ops[0] = NewN1;
	NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);

	if (Mode == MULU8 \|\| Mode == MULS8) {
	// Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower
	// part is needed.
	SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);

	// convert the type of mul result to VT.
	MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
	SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG
	: ISD::SIGN_EXTEND_VECTOR_INREG,
	DL, ResVT, Mul);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
	DAG.getIntPtrConstant(0, DL));
	} else {
	// Generate the lower and higher part of mul: pmulhw/pmulhuw. For
	// MULU16/MULS16, both parts are needed.
	SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1);
	SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL,
	OpsVT, NewN0, NewN1);

	// Repack the lower part and higher part result of mul into a wider
	// result. Make sure the type of mul result is VT.
	MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
	SDValue Res = DAG.getNode(X86ISD::UNPCKL, DL, OpsVT, MulLo, MulHi);
	Res = DAG.getNode(ISD::BITCAST, DL, ResVT, Res);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
	DAG.getIntPtrConstant(0, DL));
	}
	}
	}

	static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
	EVT VT, SDLoc DL) {

	auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) {
	SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
	DAG.getConstant(Mult, DL, VT));
	Result = DAG.getNode(ISD::SHL, DL, VT, Result,
	DAG.getConstant(Shift, DL, MVT::i8));
	Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result,
	N->getOperand(0));
	return Result;
	};

	auto combineMulMulAddOrSub = [&](bool isAdd) {
	SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
	DAG.getConstant(9, DL, VT));
	Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT));
	Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result,
	N->getOperand(0));
	return Result;
	};

	switch (MulAmt) {
	default:
	break;
	case 11:
	// mul x, 11 => add ((shl (mul x, 5), 1), x)
	return combineMulShlAddOrSub(5, 1, /isAdd/ true);
	case 21:
	// mul x, 21 => add ((shl (mul x, 5), 2), x)
	return combineMulShlAddOrSub(5, 2, /isAdd/ true);
	case 22:
	// mul x, 22 => add (add ((shl (mul x, 5), 2), x), x)
	return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
	combineMulShlAddOrSub(5, 2, /isAdd/ true));
	case 19:
	// mul x, 19 => sub ((shl (mul x, 5), 2), x)
	return combineMulShlAddOrSub(5, 2, /isAdd/ false);
	case 13:
	// mul x, 13 => add ((shl (mul x, 3), 2), x)
	return combineMulShlAddOrSub(3, 2, /isAdd/ true);
	case 23:
	// mul x, 13 => sub ((shl (mul x, 3), 3), x)
	return combineMulShlAddOrSub(3, 3, /isAdd/ false);
	case 14:
	// mul x, 14 => add (add ((shl (mul x, 3), 2), x), x)
	return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
	combineMulShlAddOrSub(3, 2, /isAdd/ true));
	case 26:
	// mul x, 26 => sub ((mul (mul x, 9), 3), x)
	return combineMulMulAddOrSub(/isAdd/ false);
	case 28:
	// mul x, 28 => add ((mul (mul x, 9), 3), x)
	return combineMulMulAddOrSub(/isAdd/ true);
	case 29:
	// mul x, 29 => add (add ((mul (mul x, 9), 3), x), x)
	return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
	combineMulMulAddOrSub(/isAdd/ true));
	case 30:
	// mul x, 30 => sub (sub ((shl x, 5), x), x)
	return DAG.getNode(
	ISD::SUB, DL, VT,
	DAG.getNode(ISD::SUB, DL, VT,
	DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant(5, DL, MVT::i8)),
	N->getOperand(0)),
	N->getOperand(0));
	}
	return SDValue();
	}

	/// Optimize a single multiply with constant into two operations in order to
	/// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
	static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	if (DCI.isBeforeLegalize() && VT.isVector())
	return reduceVMULWidth(N, DAG, Subtarget);

	if (!MulConstantOptimization)
	return SDValue();
	// An imul is usually smaller than the alternative sequence.
	if (DAG.getMachineFunction().getFunction()->optForMinSize())
	return SDValue();

	if (DCI.isBeforeLegalize() \|\| DCI.isCalledByLegalizer())
	return SDValue();

	if (VT != MVT::i64 && VT != MVT::i32)
	return SDValue();

	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!C)
	return SDValue();
	uint64_t MulAmt = C->getZExtValue();
	if (isPowerOf2_64(MulAmt) \|\| MulAmt == 3 \|\| MulAmt == 5 \|\| MulAmt == 9)
	return SDValue();

	uint64_t MulAmt1 = 0;
	uint64_t MulAmt2 = 0;
	if ((MulAmt % 9) == 0) {
	MulAmt1 = 9;
	MulAmt2 = MulAmt / 9;
	} else if ((MulAmt % 5) == 0) {
	MulAmt1 = 5;
	MulAmt2 = MulAmt / 5;
	} else if ((MulAmt % 3) == 0) {
	MulAmt1 = 3;
	MulAmt2 = MulAmt / 3;
	}

	SDLoc DL(N);
	SDValue NewMul;
	if (MulAmt2 &&
	(isPowerOf2_64(MulAmt2) \|\| MulAmt2 == 3 \|\| MulAmt2 == 5 \|\| MulAmt2 == 9)){

	if (isPowerOf2_64(MulAmt2) &&
	!(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
	// If second multiplifer is pow2, issue it first. We want the multiply by
	// 3, 5, or 9 to be folded into the addressing mode unless the lone use
	// is an add.
	std::swap(MulAmt1, MulAmt2);

	if (isPowerOf2_64(MulAmt1))
	NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8));
	else
	NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
	DAG.getConstant(MulAmt1, DL, VT));

	if (isPowerOf2_64(MulAmt2))
	NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
	DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8));
	else
	NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
	DAG.getConstant(MulAmt2, DL, VT));
	} else if (!Subtarget.slowLEA())
	NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);

	if (!NewMul) {
	assert(MulAmt != 0 &&
	MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
	"Both cases that could cause potential overflows should have "
	"already been handled.");
	int64_t SignMulAmt = C->getSExtValue();
	if ((SignMulAmt != INT64_MIN) && (SignMulAmt != INT64_MAX) &&
	(SignMulAmt != -INT64_MAX)) {
	int NumSign = SignMulAmt > 0 ? 1 : -1;
	bool IsPowerOf2_64PlusOne = isPowerOf2_64(NumSign * SignMulAmt - 1);
	bool IsPowerOf2_64MinusOne = isPowerOf2_64(NumSign * SignMulAmt + 1);
	if (IsPowerOf2_64PlusOne) {
	// (mul x, 2^N + 1) => (add (shl x, N), x)
	NewMul = DAG.getNode(
	ISD::ADD, DL, VT, N->getOperand(0),
	DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant(Log2_64(NumSign * SignMulAmt - 1), DL,
	MVT::i8)));
	} else if (IsPowerOf2_64MinusOne) {
	// (mul x, 2^N - 1) => (sub (shl x, N), x)
	NewMul = DAG.getNode(
	ISD::SUB, DL, VT,
	DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant(Log2_64(NumSign * SignMulAmt + 1), DL,
	MVT::i8)),
	N->getOperand(0));
	}
	// To negate, subtract the number from zero
	if ((IsPowerOf2_64PlusOne \|\| IsPowerOf2_64MinusOne) && NumSign == -1)
	NewMul =
	DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
	}
	}

	if (NewMul)
	// Do not add new nodes to DAG combiner worklist.
	DCI.CombineTo(N, NewMul, false);

	return SDValue();
	}

	static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
	EVT VT = N0.getValueType();

	// fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
	// since the result of setcc_c is all zero's or all ones.
	if (VT.isInteger() && !VT.isVector() &&
	N1C && N0.getOpcode() == ISD::AND &&
	N0.getOperand(1).getOpcode() == ISD::Constant) {
	SDValue N00 = N0.getOperand(0);
	APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
	Mask <<= N1C->getAPIntValue();
	bool MaskOK = false;
	// We can handle cases concerning bit-widening nodes containing setcc_c if
	// we carefully interrogate the mask to make sure we are semantics
	// preserving.
	// The transform is not safe if the result of C1 << C2 exceeds the bitwidth
	// of the underlying setcc_c operation if the setcc_c was zero extended.
	// Consider the following example:
	// zext(setcc_c) -> i32 0x0000FFFF
	// c1 -> i32 0x0000FFFF
	// c2 -> i32 0x00000001
	// (shl (and (setcc_c), c1), c2) -> i32 0x0001FFFE
	// (and setcc_c, (c1 << c2)) -> i32 0x0000FFFE
	if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
	MaskOK = true;
	} else if (N00.getOpcode() == ISD::SIGN_EXTEND &&
	N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
	MaskOK = true;
	} else if ((N00.getOpcode() == ISD::ZERO_EXTEND \|\|
	N00.getOpcode() == ISD::ANY_EXTEND) &&
	N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
	MaskOK = Mask.isIntN(N00.getOperand(0).getValueSizeInBits());
	}
	if (MaskOK && Mask != 0) {
	SDLoc DL(N);
	return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT));
	}
	}

	// Hardware support for vector shifts is sparse which makes us scalarize the
	// vector operations in many cases. Also, on sandybridge ADD is faster than
	// shl.
	// (shl V, 1) -> add V,V
	if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
	if (auto *N1SplatC = N1BV->getConstantSplatNode()) {
	assert(N0.getValueType().isVector() && "Invalid vector shift type");
	// We shift all of the values by one. In many cases we do not have
	// hardware support for this operation. This is better expressed as an ADD
	// of two values.
	if (N1SplatC->getAPIntValue() == 1)
	return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
	}

	return SDValue();
	}

	static SDValue combineShiftRightAlgebraic(SDNode *N, SelectionDAG &DAG) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	unsigned Size = VT.getSizeInBits();

	// fold (ashr (shl, a, [56,48,32,24,16]), SarConst)
	// into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or
	// into (lshr, (sext (a), SarConst - [56,48,32,24,16]))
	// depending on sign of (SarConst - [56,48,32,24,16])

	// sexts in X86 are MOVs. The MOVs have the same code size
	// as above SHIFTs (only SHIFT on 1 has lower code size).
	// However the MOVs have 2 advantages to a SHIFT:
	// 1. MOVs can write to a register that differs from source
	// 2. MOVs accept memory operands

	if (!VT.isInteger() \|\| VT.isVector() \|\| N1.getOpcode() != ISD::Constant \|\|
	N0.getOpcode() != ISD::SHL \|\| !N0.hasOneUse() \|\|
	N0.getOperand(1).getOpcode() != ISD::Constant)
	return SDValue();

	SDValue N00 = N0.getOperand(0);
	SDValue N01 = N0.getOperand(1);
	APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue();
	APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue();
	EVT CVT = N1.getValueType();

	if (SarConst.isNegative())
	return SDValue();

	for (MVT SVT : MVT::integer_valuetypes()) {
	unsigned ShiftSize = SVT.getSizeInBits();
	// skipping types without corresponding sext/zext and
	// ShlConst that is not one of [56,48,32,24,16]
	if (ShiftSize < 8 \|\| ShiftSize > 64 \|\| ShlConst != Size - ShiftSize)
	continue;
	SDLoc DL(N);
	SDValue NN =
	DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT));
	SarConst = SarConst - (Size - ShiftSize);
	if (SarConst == 0)
	return NN;
	else if (SarConst.isNegative())
	return DAG.getNode(ISD::SHL, DL, VT, NN,
	DAG.getConstant(-SarConst, DL, CVT));
	else
	return DAG.getNode(ISD::SRA, DL, VT, NN,
	DAG.getConstant(SarConst, DL, CVT));
	}
	return SDValue();
	}

	/// \brief Returns a vector of 0s if the node in input is a vector logical
	/// shift by a constant amount which is known to be bigger than or equal
	/// to the vector element size in bits.
	static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);

	if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
	(!Subtarget.hasInt256() \|\|
	(VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
	return SDValue();

	SDValue Amt = N->getOperand(1);
	SDLoc DL(N);
	if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Amt))
	if (auto *AmtSplat = AmtBV->getConstantSplatNode()) {
	const APInt &ShiftAmt = AmtSplat->getAPIntValue();
	unsigned MaxAmount =
	VT.getSimpleVT().getScalarSizeInBits();

	// SSE2/AVX2 logical shifts always return a vector of 0s
	// if the shift amount is bigger than or equal to
	// the element size. The constant shift amount will be
	// encoded as a 8-bit immediate.
	if (ShiftAmt.trunc(8).uge(MaxAmount))
	return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, DL);
	}

	return SDValue();
	}

	static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (N->getOpcode() == ISD::SHL)
	if (SDValue V = combineShiftLeft(N, DAG))
	return V;

	if (N->getOpcode() == ISD::SRA)
	if (SDValue V = combineShiftRightAlgebraic(N, DAG))
	return V;

	// Try to fold this logical shift into a zero vector.
	if (N->getOpcode() != ISD::SRA)
	if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))
	return V;

	return SDValue();
	}

	static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	unsigned Opcode = N->getOpcode();
	assert((X86ISD::VSHLI == Opcode \|\| X86ISD::VSRAI == Opcode \|\|
	X86ISD::VSRLI == Opcode) &&
	"Unexpected shift opcode");
	bool LogicalShift = X86ISD::VSHLI == Opcode \|\| X86ISD::VSRLI == Opcode;
	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
	assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 &&
	"Unexpected value type");

	// Out of range logical bit shifts are guaranteed to be zero.
	// Out of range arithmetic bit shifts splat the sign bit.
	APInt ShiftVal = cast<ConstantSDNode>(N1)->getAPIntValue();
	if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt)) {
	if (LogicalShift)
	return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
	else
	ShiftVal = NumBitsPerElt - 1;
	}

	// Shift N0 by zero -> N0.
	if (!ShiftVal)
	return N0;

	// Shift zero -> zero.
	if (ISD::isBuildVectorAllZeros(N0.getNode()))
	return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));

	// fold (VSRLI (VSRAI X, Y), 31) -> (VSRLI X, 31).
	// This VSRLI only looks at the sign bit, which is unmodified by VSRAI.
	// TODO - support other sra opcodes as needed.
	if (Opcode == X86ISD::VSRLI && (ShiftVal + 1) == NumBitsPerElt &&
	N0.getOpcode() == X86ISD::VSRAI)
	return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, N0.getOperand(0), N1);

	// We can decode 'whole byte' logical bit shifts as shuffles.
	if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) {
	SDValue Op(N, 0);
	SmallVector<int, 1> NonceMask; // Just a placeholder.
	NonceMask.push_back(0);
	if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
	/Depth/ 1, /HasVarMask/ false, DAG,
	DCI, Subtarget))
	return SDValue(); // This routine will use CombineTo to replace N.
	}

	// Constant Folding.
	APInt UndefElts;
	SmallVector<APInt, 32> EltBits;
	if (N->isOnlyUserOf(N0.getNode()) &&
	getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
	assert(EltBits.size() == VT.getVectorNumElements() &&
	"Unexpected shift value type");
	unsigned ShiftImm = ShiftVal.getZExtValue();
	for (APInt &Elt : EltBits) {
	if (X86ISD::VSHLI == Opcode)
	Elt <<= ShiftImm;
	else if (X86ISD::VSRAI == Opcode)
	Elt.ashrInPlace(ShiftImm);
	else
	Elt.lshrInPlace(ShiftImm);
	}
	return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
	}

	return SDValue();
	}

	static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	assert(
	((N->getOpcode() == X86ISD::PINSRB && N->getValueType(0) == MVT::v16i8) \|\|
	(N->getOpcode() == X86ISD::PINSRW &&
	N->getValueType(0) == MVT::v8i16)) &&
	"Unexpected vector insertion");

	// Attempt to combine PINSRB/PINSRW patterns to a shuffle.
	SDValue Op(N, 0);
	SmallVector<int, 1> NonceMask; // Just a placeholder.
	NonceMask.push_back(0);
	combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
	/Depth/ 1, /HasVarMask/ false, DAG,
	DCI, Subtarget);
	return SDValue();
	}

	/// Recognize the distinctive (AND (setcc ...) (setcc ..)) where both setccs
	/// reference the same FP CMP, and rewrite for CMPEQSS and friends. Likewise for
	/// OR -> CMPNEQSS.
	static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	unsigned opcode;

	// SSE1 supports CMP{eq\|ne}SS, and SSE2 added CMP{eq\|ne}SD, but
	// we're requiring SSE2 for both.
	if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue CMP0 = N0->getOperand(1);
	SDValue CMP1 = N1->getOperand(1);
	SDLoc DL(N);

	// The SETCCs should both refer to the same CMP.
	if (CMP0.getOpcode() != X86ISD::CMP \|\| CMP0 != CMP1)
	return SDValue();

	SDValue CMP00 = CMP0->getOperand(0);
	SDValue CMP01 = CMP0->getOperand(1);
	EVT VT = CMP00.getValueType();

	if (VT == MVT::f32 \|\| VT == MVT::f64) {
	bool ExpectingFlags = false;
	// Check for any users that want flags:
	for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
	!ExpectingFlags && UI != UE; ++UI)
	switch (UI->getOpcode()) {
	default:
	case ISD::BR_CC:
	case ISD::BRCOND:
	case ISD::SELECT:
	ExpectingFlags = true;
	break;
	case ISD::CopyToReg:
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND:
	break;
	}

	if (!ExpectingFlags) {
	enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
	enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);

	if (cc1 == X86::COND_E \|\| cc1 == X86::COND_NE) {
	X86::CondCode tmp = cc0;
	cc0 = cc1;
	cc1 = tmp;
	}

	if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) \|\|
	(cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
	// FIXME: need symbolic constants for these magic numbers.
	// See X86ATTInstPrinter.cpp:printSSECC().
	unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
	if (Subtarget.hasAVX512()) {
	SDValue FSetCC =
	DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
	DAG.getConstant(x86cc, DL, MVT::i8));
	return DAG.getNode(X86ISD::VEXTRACT, DL, N->getSimpleValueType(0),
	FSetCC, DAG.getIntPtrConstant(0, DL));
	}
	SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL,
	CMP00.getValueType(), CMP00, CMP01,
	DAG.getConstant(x86cc, DL,
	MVT::i8));

	bool is64BitFP = (CMP00.getValueType() == MVT::f64);
	MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;

	if (is64BitFP && !Subtarget.is64Bit()) {
	// On a 32-bit target, we cannot bitcast the 64-bit float to a
	// 64-bit integer, since that's not a legal type. Since
	// OnesOrZeroesF is all ones of all zeroes, we don't need all the
	// bits, but can do this little dance to extract the lowest 32 bits
	// and work with those going forward.
	SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
	OnesOrZeroesF);
	SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64);
	OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
	Vector32, DAG.getIntPtrConstant(0, DL));
	IntVT = MVT::i32;
	}

	SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF);
	SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
	DAG.getConstant(1, DL, IntVT));
	SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
	ANDed);
	return OneBitOfTruth;
	}
	}
	}
	}
	return SDValue();
	}

	/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
	static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
	assert(N->getOpcode() == ISD::AND);

	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDLoc DL(N);

	if (VT != MVT::v2i64 && VT != MVT::v4i64 && VT != MVT::v8i64)
	return SDValue();

	if (N0.getOpcode() == ISD::XOR &&
	ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
	return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);

	if (N1.getOpcode() == ISD::XOR &&
	ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
	return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);

	return SDValue();
	}

	// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
	// register. In most cases we actually compare or select YMM-sized registers
	// and mixing the two types creates horrible code. This method optimizes
	// some of the transition sequences.
	static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	if (!VT.is256BitVector())
	return SDValue();

	assert((N->getOpcode() == ISD::ANY_EXTEND \|\|
	N->getOpcode() == ISD::ZERO_EXTEND \|\|
	N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");

	SDValue Narrow = N->getOperand(0);
	EVT NarrowVT = Narrow->getValueType(0);
	if (!NarrowVT.is128BitVector())
	return SDValue();

	if (Narrow->getOpcode() != ISD::XOR &&
	Narrow->getOpcode() != ISD::AND &&
	Narrow->getOpcode() != ISD::OR)
	return SDValue();

	SDValue N0 = Narrow->getOperand(0);
	SDValue N1 = Narrow->getOperand(1);
	SDLoc DL(Narrow);

	// The Left side has to be a trunc.
	if (N0.getOpcode() != ISD::TRUNCATE)
	return SDValue();

	// The type of the truncated inputs.
	EVT WideVT = N0->getOperand(0)->getValueType(0);
	if (WideVT != VT)
	return SDValue();

	// The right side has to be a 'trunc' or a constant vector.
	bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
	ConstantSDNode *RHSConstSplat = nullptr;
	if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1))
	RHSConstSplat = RHSBV->getConstantSplatNode();
	if (!RHSTrunc && !RHSConstSplat)
	return SDValue();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT))
	return SDValue();

	// Set N0 and N1 to hold the inputs to the new wide operation.
	N0 = N0->getOperand(0);
	if (RHSConstSplat) {
	N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getVectorElementType(),
	SDValue(RHSConstSplat, 0));
	N1 = DAG.getSplatBuildVector(WideVT, DL, N1);
	} else if (RHSTrunc) {
	N1 = N1->getOperand(0);
	}

	// Generate the wide operation.
	SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1);
	unsigned Opcode = N->getOpcode();
	switch (Opcode) {
	case ISD::ANY_EXTEND:
	return Op;
	case ISD::ZERO_EXTEND: {
	unsigned InBits = NarrowVT.getScalarSizeInBits();
	APInt Mask = APInt::getAllOnesValue(InBits);
	Mask = Mask.zext(VT.getScalarSizeInBits());
	return DAG.getNode(ISD::AND, DL, VT,
	Op, DAG.getConstant(Mask, DL, VT));
	}
	case ISD::SIGN_EXTEND:
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
	Op, DAG.getValueType(NarrowVT));
	default:
	llvm_unreachable("Unexpected opcode");
	}
	}

	/// If both input operands of a logic op are being cast from floating point
	/// types, try to convert this into a floating point logic node to avoid
	/// unnecessary moves from SSE to integer registers.
	static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	unsigned FPOpcode = ISD::DELETED_NODE;
	if (N->getOpcode() == ISD::AND)
	FPOpcode = X86ISD::FAND;
	else if (N->getOpcode() == ISD::OR)
	FPOpcode = X86ISD::FOR;
	else if (N->getOpcode() == ISD::XOR)
	FPOpcode = X86ISD::FXOR;

	assert(FPOpcode != ISD::DELETED_NODE &&
	"Unexpected input node for FP logic conversion");

	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDLoc DL(N);
	if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
	((Subtarget.hasSSE1() && VT == MVT::i32) \|\|
	(Subtarget.hasSSE2() && VT == MVT::i64))) {
	SDValue N00 = N0.getOperand(0);
	SDValue N10 = N1.getOperand(0);
	EVT N00Type = N00.getValueType();
	EVT N10Type = N10.getValueType();
	if (N00Type.isFloatingPoint() && N10Type.isFloatingPoint()) {
	SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
	return DAG.getBitcast(VT, FPLogic);
	}
	}
	return SDValue();
	}

	/// If this is a zero/all-bits result that is bitwise-anded with a low bits
	/// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and'
	/// with a shift-right to eliminate loading the vector constant mask value.
	static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
	SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
	EVT VT0 = Op0.getValueType();
	EVT VT1 = Op1.getValueType();

	if (VT0 != VT1 \|\| !VT0.isSimple() \|\| !VT0.isInteger())
	return SDValue();

	APInt SplatVal;
	- if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) \|\|
	+ if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal,
	+ /AllowShrink/false) \|\|
	!SplatVal.isMask())
	return SDValue();

	if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL))
	return SDValue();

	unsigned EltBitWidth = VT0.getScalarSizeInBits();
	if (EltBitWidth != DAG.ComputeNumSignBits(Op0))
	return SDValue();

	SDLoc DL(N);
	unsigned ShiftVal = SplatVal.countTrailingOnes();
	SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
	SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt);
	return DAG.getBitcast(N->getValueType(0), Shift);
	}

	static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
	return R;

	if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
	return FPLogic;

	if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG))
	return R;

	if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
	return ShiftRight;

	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDLoc DL(N);

	// Attempt to recursively combine a bitmask AND with shuffles.
	if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
	SDValue Op(N, 0);
	SmallVector<int, 1> NonceMask; // Just a placeholder.
	NonceMask.push_back(0);
	if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
	/Depth/ 1, /HasVarMask/ false, DAG,
	DCI, Subtarget))
	return SDValue(); // This routine will use CombineTo to replace N.
	}

	// Create BEXTR instructions
	// BEXTR is ((X >> imm) & (2**size-1))
	if (VT != MVT::i32 && VT != MVT::i64)
	return SDValue();

	if (!Subtarget.hasBMI() && !Subtarget.hasTBM())
	return SDValue();
	if (N0.getOpcode() != ISD::SRA && N0.getOpcode() != ISD::SRL)
	return SDValue();

	ConstantSDNode *MaskNode = dyn_cast<ConstantSDNode>(N1);
	ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(N0.getOperand(1));
	if (MaskNode && ShiftNode) {
	uint64_t Mask = MaskNode->getZExtValue();
	uint64_t Shift = ShiftNode->getZExtValue();
	if (isMask_64(Mask)) {
	uint64_t MaskSize = countPopulation(Mask);
	if (Shift + MaskSize <= VT.getSizeInBits())
	return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0),
	DAG.getConstant(Shift \| (MaskSize << 8), DL,
	VT));
	}
	}
	return SDValue();
	}

	// Try to fold:
	// (or (and (m, y), (pandn m, x)))
	// into:
	// (vselect m, x, y)
	// As a special case, try to fold:
	// (or (and (m, (sub 0, x)), (pandn m, x)))
	// into:
	// (sub (xor X, M), M)
	static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);

	if (!((VT.is128BitVector() && Subtarget.hasSSE2()) \|\|
	(VT.is256BitVector() && Subtarget.hasInt256())))
	return SDValue();

	// Canonicalize AND to LHS.
	if (N1.getOpcode() == ISD::AND)
	std::swap(N0, N1);

	// TODO: Attempt to match against AND(XOR(-1,X),Y) as well, waiting for
	// ANDNP combine allows other combines to happen that prevent matching.
	if (N0.getOpcode() != ISD::AND \|\| N1.getOpcode() != X86ISD::ANDNP)
	return SDValue();

	SDValue Mask = N1.getOperand(0);
	SDValue X = N1.getOperand(1);
	SDValue Y;
	if (N0.getOperand(0) == Mask)
	Y = N0.getOperand(1);
	if (N0.getOperand(1) == Mask)
	Y = N0.getOperand(0);

	// Check to see if the mask appeared in both the AND and ANDNP.
	if (!Y.getNode())
	return SDValue();

	// Validate that X, Y, and Mask are bitcasts, and see through them.
	Mask = peekThroughBitcasts(Mask);
	X = peekThroughBitcasts(X);
	Y = peekThroughBitcasts(Y);

	EVT MaskVT = Mask.getValueType();
	unsigned EltBits = MaskVT.getScalarSizeInBits();

	// TODO: Attempt to handle floating point cases as well?
	if (!MaskVT.isInteger() \|\| DAG.ComputeNumSignBits(Mask) != EltBits)
	return SDValue();

	SDLoc DL(N);

	// Try to match:
	// (or (and (M, (sub 0, X)), (pandn M, X)))
	// which is a special case of vselect:
	// (vselect M, (sub 0, X), X)
	// Per:
	// http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
	// We know that, if fNegate is 0 or 1:
	// (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
	//
	// Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
	// ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
	// ( M ? -X : X) == ((X ^ M ) + (M & 1))
	// This lets us transform our vselect to:
	// (add (xor X, M), (and M, 1))
	// And further to:
	// (sub (xor X, M), M)
	if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT &&
	DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT)) {
	auto IsNegV = [](SDNode *N, SDValue V) {
	return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
	ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
	};
	SDValue V;
	if (IsNegV(Y.getNode(), X))
	V = X;
	else if (IsNegV(X.getNode(), Y))
	V = Y;

	if (V) {
	SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
	SDValue SubOp2 = Mask;

	// If the negate was on the false side of the select, then
	// the operands of the SUB need to be swapped. PR 27251.
	// This is because the pattern being matched above is
	// (vselect M, (sub (0, X), X) -> (sub (xor X, M), M)
	// but if the pattern matched was
	// (vselect M, X, (sub (0, X))), that is really negation of the pattern
	// above, -(vselect M, (sub 0, X), X), and therefore the replacement
	// pattern also needs to be a negation of the replacement pattern above.
	// And -(sub X, Y) is just sub (Y, X), so swapping the operands of the
	// sub accomplishes the negation of the replacement pattern.
	if (V == Y)
	std::swap(SubOp1, SubOp2);

	SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2);
	return DAG.getBitcast(VT, Res);
	}
	}

	// PBLENDVB is only available on SSE 4.1.
	if (!Subtarget.hasSSE41())
	return SDValue();

	MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;

	X = DAG.getBitcast(BlendVT, X);
	Y = DAG.getBitcast(BlendVT, Y);
	Mask = DAG.getBitcast(BlendVT, Mask);
	Mask = DAG.getSelect(DL, BlendVT, Mask, Y, X);
	return DAG.getBitcast(VT, Mask);
	}

	// Helper function for combineOrCmpEqZeroToCtlzSrl
	// Transforms:
	// seteq(cmp x, 0)
	// into:
	// srl(ctlz x), log2(bitsize(x))
	// Input pattern is checked by caller.
	static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
	SelectionDAG &DAG) {
	SDValue Cmp = Op.getOperand(1);
	EVT VT = Cmp.getOperand(0).getValueType();
	unsigned Log2b = Log2_32(VT.getSizeInBits());
	SDLoc dl(Op);
	SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0));
	// The result of the shift is true or false, and on X86, the 32-bit
	// encoding of shr and lzcnt is more desirable.
	SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32);
	SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc,
	DAG.getConstant(Log2b, dl, VT));
	return DAG.getZExtOrTrunc(Scc, dl, ExtTy);
	}

	// Try to transform:
	// zext(or(setcc(eq, (cmp x, 0)), setcc(eq, (cmp y, 0))))
	// into:
	// srl(or(ctlz(x), ctlz(y)), log2(bitsize(x))
	// Will also attempt to match more generic cases, eg:
	// zext(or(or(setcc(eq, cmp 0), setcc(eq, cmp 0)), setcc(eq, cmp 0)))
	// Only applies if the target supports the FastLZCNT feature.
	static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (DCI.isBeforeLegalize() \|\| !Subtarget.getTargetLowering()->isCtlzFast())
	return SDValue();

	auto isORCandidate = [](SDValue N) {
	return (N->getOpcode() == ISD::OR && N->hasOneUse());
	};

	// Check the zero extend is extending to 32-bit or more. The code generated by
	// srl(ctlz) for 16-bit or less variants of the pattern would require extra
	// instructions to clear the upper bits.
	if (!N->hasOneUse() \|\| !N->getSimpleValueType(0).bitsGE(MVT::i32) \|\|
	!isORCandidate(N->getOperand(0)))
	return SDValue();

	// Check the node matches: setcc(eq, cmp 0)
	auto isSetCCCandidate = [](SDValue N) {
	return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() &&
	X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E &&
	N->getOperand(1).getOpcode() == X86ISD::CMP &&
	isNullConstant(N->getOperand(1).getOperand(1)) &&
	N->getOperand(1).getValueType().bitsGE(MVT::i32);
	};

	SDNode *OR = N->getOperand(0).getNode();
	SDValue LHS = OR->getOperand(0);
	SDValue RHS = OR->getOperand(1);

	// Save nodes matching or(or, setcc(eq, cmp 0)).
	SmallVector<SDNode *, 2> ORNodes;
	while (((isORCandidate(LHS) && isSetCCCandidate(RHS)) \|\|
	(isORCandidate(RHS) && isSetCCCandidate(LHS)))) {
	ORNodes.push_back(OR);
	OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode();
	LHS = OR->getOperand(0);
	RHS = OR->getOperand(1);
	}

	// The last OR node should match or(setcc(eq, cmp 0), setcc(eq, cmp 0)).
	if (!(isSetCCCandidate(LHS) && isSetCCCandidate(RHS)) \|\|
	!isORCandidate(SDValue(OR, 0)))
	return SDValue();

	// We have a or(setcc(eq, cmp 0), setcc(eq, cmp 0)) pattern, try to lower it
	// to
	// or(srl(ctlz),srl(ctlz)).
	// The dag combiner can then fold it into:
	// srl(or(ctlz, ctlz)).
	EVT VT = OR->getValueType(0);
	SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG);
	SDValue Ret, NewRHS;
	if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG)))
	Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS);

	if (!Ret)
	return SDValue();

	// Try to lower nodes matching the or(or, setcc(eq, cmp 0)) pattern.
	while (ORNodes.size() > 0) {
	OR = ORNodes.pop_back_val();
	LHS = OR->getOperand(0);
	RHS = OR->getOperand(1);
	// Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
	if (RHS->getOpcode() == ISD::OR)
	std::swap(LHS, RHS);
	EVT VT = OR->getValueType(0);
	SDValue NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
	if (!NewRHS)
	return SDValue();
	Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS);
	}

	if (Ret)
	Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);

	return Ret;
	}

	static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
	return R;

	if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
	return FPLogic;

	if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
	return R;

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);

	if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
	return SDValue();

	// fold (or (x << c) \| (y >> (64 - c))) ==> (shld64 x, y, c)
	bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();

	// SHLD/SHRD instructions have lower register pressure, but on some
	// platforms they have higher latency than the equivalent
	// series of shifts/or that would otherwise be generated.
	// Don't fold (or (x << c) \| (y >> (64 - c))) if SHLD/SHRD instructions
	// have higher latencies and we are not optimizing for size.
	if (!OptForSize && Subtarget.isSHLDSlow())
	return SDValue();

	if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
	std::swap(N0, N1);
	if (N0.getOpcode() != ISD::SHL \|\| N1.getOpcode() != ISD::SRL)
	return SDValue();
	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
	return SDValue();

	SDValue ShAmt0 = N0.getOperand(1);
	if (ShAmt0.getValueType() != MVT::i8)
	return SDValue();
	SDValue ShAmt1 = N1.getOperand(1);
	if (ShAmt1.getValueType() != MVT::i8)
	return SDValue();
	if (ShAmt0.getOpcode() == ISD::TRUNCATE)
	ShAmt0 = ShAmt0.getOperand(0);
	if (ShAmt1.getOpcode() == ISD::TRUNCATE)
	ShAmt1 = ShAmt1.getOperand(0);

	SDLoc DL(N);
	unsigned Opc = X86ISD::SHLD;
	SDValue Op0 = N0.getOperand(0);
	SDValue Op1 = N1.getOperand(0);
	if (ShAmt0.getOpcode() == ISD::SUB \|\|
	ShAmt0.getOpcode() == ISD::XOR) {
	Opc = X86ISD::SHRD;
	std::swap(Op0, Op1);
	std::swap(ShAmt0, ShAmt1);
	}

	// OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C )
	// OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C )
	// OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C )
	// OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C )
	unsigned Bits = VT.getSizeInBits();
	if (ShAmt1.getOpcode() == ISD::SUB) {
	SDValue Sum = ShAmt1.getOperand(0);
	if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
	SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
	if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE)
	ShAmt1Op1 = ShAmt1Op1.getOperand(0);
	if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
	return DAG.getNode(Opc, DL, VT,
	Op0, Op1,
	DAG.getNode(ISD::TRUNCATE, DL,
	MVT::i8, ShAmt0));
	}
	} else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
	ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
	if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits)
	return DAG.getNode(Opc, DL, VT,
	N0.getOperand(0), N1.getOperand(0),
	DAG.getNode(ISD::TRUNCATE, DL,
	MVT::i8, ShAmt0));
	} else if (ShAmt1.getOpcode() == ISD::XOR) {
	SDValue Mask = ShAmt1.getOperand(1);
	if (ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
	unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL);
	SDValue ShAmt1Op0 = ShAmt1.getOperand(0);
	if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE)
	ShAmt1Op0 = ShAmt1Op0.getOperand(0);
	if (MaskC->getSExtValue() == (Bits - 1) && ShAmt1Op0 == ShAmt0) {
	if (Op1.getOpcode() == InnerShift &&
	isa<ConstantSDNode>(Op1.getOperand(1)) &&
	Op1.getConstantOperandVal(1) == 1) {
	return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
	DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
	}
	// Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ).
	if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD &&
	Op1.getOperand(0) == Op1.getOperand(1)) {
	return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
	DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
	}
	}
	}
	}

	return SDValue();
	}

	/// Generate NEG and CMOV for integer abs.
	static SDValue combineIntegerAbs(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);

	// Since X86 does not have CMOV for 8-bit integer, we don't convert
	// 8-bit integer abs to NEG and CMOV.
	if (VT.isInteger() && VT.getSizeInBits() == 8)
	return SDValue();

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDLoc DL(N);

	// Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
	// and change it to SUB and CMOV.
	if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
	N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
	N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0)) {
	auto *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
	if (Y1C && Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
	// Generate SUB & CMOV.
	SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
	DAG.getConstant(0, DL, VT), N0.getOperand(0));
	SDValue Ops[] = {N0.getOperand(0), Neg,
	DAG.getConstant(X86::COND_GE, DL, MVT::i8),
	SDValue(Neg.getNode(), 1)};
	return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops);
	}
	}
	return SDValue();
	}

	/// Try to turn tests against the signbit in the form of:
	/// XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
	/// into:
	/// SETGT(X, -1)
	static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) {
	// This is only worth doing if the output type is i8 or i1.
	EVT ResultType = N->getValueType(0);
	if (ResultType != MVT::i8 && ResultType != MVT::i1)
	return SDValue();

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// We should be performing an xor against a truncated shift.
	if (N0.getOpcode() != ISD::TRUNCATE \|\| !N0.hasOneUse())
	return SDValue();

	// Make sure we are performing an xor against one.
	if (!isOneConstant(N1))
	return SDValue();

	// SetCC on x86 zero extends so only act on this if it's a logical shift.
	SDValue Shift = N0.getOperand(0);
	if (Shift.getOpcode() != ISD::SRL \|\| !Shift.hasOneUse())
	return SDValue();

	// Make sure we are truncating from one of i16, i32 or i64.
	EVT ShiftTy = Shift.getValueType();
	if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64)
	return SDValue();

	// Make sure the shift amount extracts the sign bit.
	if (!isa<ConstantSDNode>(Shift.getOperand(1)) \|\|
	Shift.getConstantOperandVal(1) != ShiftTy.getSizeInBits() - 1)
	return SDValue();

	// Create a greater-than comparison against -1.
	// N.B. Using SETGE against 0 works but we want a canonical looking
	// comparison, using SETGT matches up with what TranslateX86CC.
	SDLoc DL(N);
	SDValue ShiftOp = Shift.getOperand(0);
	EVT ShiftOpTy = ShiftOp.getValueType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT SetCCResultType = TLI.getSetCCResultType(DAG.getDataLayout(),
	*DAG.getContext(), ResultType);
	SDValue Cond = DAG.getSetCC(DL, SetCCResultType, ShiftOp,
	DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT);
	if (SetCCResultType != ResultType)
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, ResultType, Cond);
	return Cond;
	}

	/// Turn vector tests of the signbit in the form of:
	/// xor (sra X, elt_size(X)-1), -1
	/// into:
	/// pcmpgt X, -1
	///
	/// This should be called before type legalization because the pattern may not
	/// persist after that.
	static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	if (!VT.isSimple())
	return SDValue();

	switch (VT.getSimpleVT().SimpleTy) {
	default: return SDValue();
	case MVT::v16i8:
	case MVT::v8i16:
	case MVT::v4i32: if (!Subtarget.hasSSE2()) return SDValue(); break;
	case MVT::v2i64: if (!Subtarget.hasSSE42()) return SDValue(); break;
	case MVT::v32i8:
	case MVT::v16i16:
	case MVT::v8i32:
	case MVT::v4i64: if (!Subtarget.hasAVX2()) return SDValue(); break;
	}

	// There must be a shift right algebraic before the xor, and the xor must be a
	// 'not' operation.
	SDValue Shift = N->getOperand(0);
	SDValue Ones = N->getOperand(1);
	if (Shift.getOpcode() != ISD::SRA \|\| !Shift.hasOneUse() \|\|
	!ISD::isBuildVectorAllOnes(Ones.getNode()))
	return SDValue();

	// The shift should be smearing the sign bit across each vector element.
	auto *ShiftBV = dyn_cast<BuildVectorSDNode>(Shift.getOperand(1));
	if (!ShiftBV)
	return SDValue();

	EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
	auto *ShiftAmt = ShiftBV->getConstantSplatNode();
	if (!ShiftAmt \|\| ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
	return SDValue();

	// Create a greater-than comparison against -1. We don't use the more obvious
	// greater-than-or-equal-to-zero because SSE/AVX don't have that instruction.
	return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
	}

	/// Check if truncation with saturation form type \p SrcVT to \p DstVT
	/// is valid for the given \p Subtarget.
	static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
	const X86Subtarget &Subtarget) {
	if (!Subtarget.hasAVX512())
	return false;

	// FIXME: Scalar type may be supported if we move it to vector register.
	if (!SrcVT.isVector() \|\| !SrcVT.isSimple() \|\| SrcVT.getSizeInBits() > 512)
	return false;

	EVT SrcElVT = SrcVT.getScalarType();
	EVT DstElVT = DstVT.getScalarType();
	if (SrcElVT.getSizeInBits() < 16 \|\| SrcElVT.getSizeInBits() > 64)
	return false;
	if (DstElVT.getSizeInBits() < 8 \|\| DstElVT.getSizeInBits() > 32)
	return false;
	if (SrcVT.is512BitVector() \|\| Subtarget.hasVLX())
	return SrcElVT.getSizeInBits() >= 32 \|\| Subtarget.hasBWI();
	return false;
	}

	/// Detect a pattern of truncation with saturation:
	/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
	/// Return the source value to be truncated or SDValue() if the pattern was not
	/// matched.
	static SDValue detectUSatPattern(SDValue In, EVT VT) {
	if (In.getOpcode() != ISD::UMIN)
	return SDValue();

	//Saturation with truncation. We truncate from InVT to VT.
	assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
	"Unexpected types for truncate operation");

	APInt C;
	- if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
	+ if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C,
	+ /AllowShrink/false)) {
	// C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
	// the element size of the destination type.
	return C.isMask(VT.getScalarSizeInBits()) ? In.getOperand(0) :
	SDValue();
	}
	return SDValue();
	}

	/// Detect a pattern of truncation with saturation:
	/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
	/// The types should allow to use VPMOVUS* instruction on AVX512.
	/// Return the source value to be truncated or SDValue() if the pattern was not
	/// matched.
	static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
	const X86Subtarget &Subtarget) {
	if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
	return SDValue();
	return detectUSatPattern(In, VT);
	}

	static SDValue
	combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isTypeLegal(In.getValueType()) \|\| !TLI.isTypeLegal(VT))
	return SDValue();
	if (auto USatVal = detectUSatPattern(In, VT))
	if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
	return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
	return SDValue();
	}

	/// This function detects the AVG pattern between vectors of unsigned i8/i16,
	/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
	/// X86ISD::AVG instruction.
	static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	const SDLoc &DL) {
	if (!VT.isVector() \|\| !VT.isSimple())
	return SDValue();
	EVT InVT = In.getValueType();
	unsigned NumElems = VT.getVectorNumElements();

	EVT ScalarVT = VT.getVectorElementType();
	if (!((ScalarVT == MVT::i8 \|\| ScalarVT == MVT::i16) &&
	isPowerOf2_32(NumElems)))
	return SDValue();

	// InScalarVT is the intermediate type in AVG pattern and it should be greater
	// than the original input type (i8/i16).
	EVT InScalarVT = InVT.getVectorElementType();
	if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits())
	return SDValue();

	if (!Subtarget.hasSSE2())
	return SDValue();
	if (Subtarget.hasBWI()) {
	if (VT.getSizeInBits() > 512)
	return SDValue();
	} else if (Subtarget.hasAVX2()) {
	if (VT.getSizeInBits() > 256)
	return SDValue();
	} else {
	if (VT.getSizeInBits() > 128)
	return SDValue();
	}

	// Detect the following pattern:
	//
	// %1 = zext <N x i8> %a to <N x i32>
	// %2 = zext <N x i8> %b to <N x i32>
	// %3 = add nuw nsw <N x i32> %1, <i32 1 x N>
	// %4 = add nuw nsw <N x i32> %3, %2
	// %5 = lshr <N x i32> %N, <i32 1 x N>
	// %6 = trunc <N x i32> %5 to <N x i8>
	//
	// In AVX512, the last instruction can also be a trunc store.

	if (In.getOpcode() != ISD::SRL)
	return SDValue();

	// A lambda checking the given SDValue is a constant vector and each element
	// is in the range [Min, Max].
	auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) {
	BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V);
	if (!BV \|\| !BV->isConstant())
	return false;
	for (SDValue Op : V->ops()) {
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
	if (!C)
	return false;
	uint64_t Val = C->getZExtValue();
	if (Val < Min \|\| Val > Max)
	return false;
	}
	return true;
	};

	// Check if each element of the vector is left-shifted by one.
	auto LHS = In.getOperand(0);
	auto RHS = In.getOperand(1);
	if (!IsConstVectorInRange(RHS, 1, 1))
	return SDValue();
	if (LHS.getOpcode() != ISD::ADD)
	return SDValue();

	// Detect a pattern of a + b + 1 where the order doesn't matter.
	SDValue Operands[3];
	Operands[0] = LHS.getOperand(0);
	Operands[1] = LHS.getOperand(1);

	// Take care of the case when one of the operands is a constant vector whose
	// element is in the range [1, 256].
	if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) &&
	Operands[0].getOpcode() == ISD::ZERO_EXTEND &&
	Operands[0].getOperand(0).getValueType() == VT) {
	// The pattern is detected. Subtract one from the constant vector, then
	// demote it and emit X86ISD::AVG instruction.
	SDValue VecOnes = DAG.getConstant(1, DL, InVT);
	Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes);
	Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]);
	return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0),
	Operands[1]);
	}

	if (Operands[0].getOpcode() == ISD::ADD)
	std::swap(Operands[0], Operands[1]);
	else if (Operands[1].getOpcode() != ISD::ADD)
	return SDValue();
	Operands[2] = Operands[1].getOperand(0);
	Operands[1] = Operands[1].getOperand(1);

	// Now we have three operands of two additions. Check that one of them is a
	// constant vector with ones, and the other two are promoted from i8/i16.
	for (int i = 0; i < 3; ++i) {
	if (!IsConstVectorInRange(Operands[i], 1, 1))
	continue;
	std::swap(Operands[i], Operands[2]);

	// Check if Operands[0] and Operands[1] are results of type promotion.
	for (int j = 0; j < 2; ++j)
	if (Operands[j].getOpcode() != ISD::ZERO_EXTEND \|\|
	Operands[j].getOperand(0).getValueType() != VT)
	return SDValue();

	// The pattern is detected, emit X86ISD::AVG instruction.
	return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0),
	Operands[1].getOperand(0));
	}

	return SDValue();
	}

	static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	LoadSDNode *Ld = cast<LoadSDNode>(N);
	EVT RegVT = Ld->getValueType(0);
	EVT MemVT = Ld->getMemoryVT();
	SDLoc dl(Ld);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// For chips with slow 32-byte unaligned loads, break the 32-byte operation
	// into two 16-byte operations. Also split non-temporal aligned loads on
	// pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.
	ISD::LoadExtType Ext = Ld->getExtensionType();
	bool Fast;
	unsigned AddressSpace = Ld->getAddressSpace();
	unsigned Alignment = Ld->getAlignment();
	if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
	Ext == ISD::NON_EXTLOAD &&
	((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) \|\|
	(TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
	AddressSpace, Alignment, &Fast) && !Fast))) {
	unsigned NumElems = RegVT.getVectorNumElements();
	if (NumElems < 2)
	return SDValue();

	SDValue Ptr = Ld->getBasePtr();

	EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
	NumElems/2);
	SDValue Load1 =
	DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
	Alignment, Ld->getMemOperand()->getFlags());

	Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl);
	SDValue Load2 =
	DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
	std::min(16U, Alignment), Ld->getMemOperand()->getFlags());
	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	Load1.getValue(1),
	Load2.getValue(1));

	SDValue NewVec = DAG.getUNDEF(RegVT);
	NewVec = insert128BitVector(NewVec, Load1, 0, DAG, dl);
	NewVec = insert128BitVector(NewVec, Load2, NumElems / 2, DAG, dl);
	return DCI.CombineTo(N, NewVec, TF, true);
	}

	return SDValue();
	}

	/// If V is a build vector of boolean constants and exactly one of those
	/// constants is true, return the operand index of that true element.
	/// Otherwise, return -1.
	static int getOneTrueElt(SDValue V) {
	// This needs to be a build vector of booleans.
	// TODO: Checking for the i1 type matches the IR definition for the mask,
	// but the mask check could be loosened to i8 or other types. That might
	// also require checking more than 'allOnesValue'; eg, the x86 HW
	// instructions only require that the MSB is set for each mask element.
	// The ISD::MSTORE comments/definition do not specify how the mask operand
	// is formatted.
	auto *BV = dyn_cast<BuildVectorSDNode>(V);
	if (!BV \|\| BV->getValueType(0).getVectorElementType() != MVT::i1)
	return -1;

	int TrueIndex = -1;
	unsigned NumElts = BV->getValueType(0).getVectorNumElements();
	for (unsigned i = 0; i < NumElts; ++i) {
	const SDValue &Op = BV->getOperand(i);
	if (Op.isUndef())
	continue;
	auto *ConstNode = dyn_cast<ConstantSDNode>(Op);
	if (!ConstNode)
	return -1;
	if (ConstNode->getAPIntValue().isAllOnesValue()) {
	// If we already found a one, this is too many.
	if (TrueIndex >= 0)
	return -1;
	TrueIndex = i;
	}
	}
	return TrueIndex;
	}

	/// Given a masked memory load/store operation, return true if it has one mask
	/// bit set. If it has one mask bit set, then also return the memory address of
	/// the scalar element to load/store, the vector index to insert/extract that
	/// scalar element, and the alignment for the scalar memory access.
	static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp,
	SelectionDAG &DAG, SDValue &Addr,
	SDValue &Index, unsigned &Alignment) {
	int TrueMaskElt = getOneTrueElt(MaskedOp->getMask());
	if (TrueMaskElt < 0)
	return false;

	// Get the address of the one scalar element that is specified by the mask
	// using the appropriate offset from the base pointer.
	EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType();
	Addr = MaskedOp->getBasePtr();
	if (TrueMaskElt != 0) {
	unsigned Offset = TrueMaskElt * EltVT.getStoreSize();
	Addr = DAG.getMemBasePlusOffset(Addr, Offset, SDLoc(MaskedOp));
	}

	Index = DAG.getIntPtrConstant(TrueMaskElt, SDLoc(MaskedOp));
	Alignment = MinAlign(MaskedOp->getAlignment(), EltVT.getStoreSize());
	return true;
	}

	/// If exactly one element of the mask is set for a non-extending masked load,
	/// it is a scalar load and vector insert.
	/// Note: It is expected that the degenerate cases of an all-zeros or all-ones
	/// mask have already been optimized in IR, so we don't bother with those here.
	static SDValue
	reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	// TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
	// However, some target hooks may need to be added to know when the transform
	// is profitable. Endianness would also have to be considered.

	SDValue Addr, VecIndex;
	unsigned Alignment;
	if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment))
	return SDValue();

	// Load the one scalar element that is specified by the mask using the
	// appropriate offset from the base pointer.
	SDLoc DL(ML);
	EVT VT = ML->getValueType(0);
	EVT EltVT = VT.getVectorElementType();
	SDValue Load =
	DAG.getLoad(EltVT, DL, ML->getChain(), Addr, ML->getPointerInfo(),
	Alignment, ML->getMemOperand()->getFlags());

	// Insert the loaded element into the appropriate place in the vector.
	SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, ML->getSrc0(),
	Load, VecIndex);
	return DCI.CombineTo(ML, Insert, Load.getValue(1), true);
	}

	static SDValue
	combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode()))
	return SDValue();

	SDLoc DL(ML);
	EVT VT = ML->getValueType(0);

	// If we are loading the first and last elements of a vector, it is safe and
	// always faster to load the whole vector. Replace the masked load with a
	// vector load and select.
	unsigned NumElts = VT.getVectorNumElements();
	BuildVectorSDNode *MaskBV = cast<BuildVectorSDNode>(ML->getMask());
	bool LoadFirstElt = !isNullConstant(MaskBV->getOperand(0));
	bool LoadLastElt = !isNullConstant(MaskBV->getOperand(NumElts - 1));
	if (LoadFirstElt && LoadLastElt) {
	SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
	ML->getMemOperand());
	SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd, ML->getSrc0());
	return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true);
	}

	// Convert a masked load with a constant mask into a masked load and a select.
	// This allows the select operation to use a faster kind of select instruction
	// (for example, vblendvps -> vblendps).

	// Don't try this if the pass-through operand is already undefined. That would
	// cause an infinite loop because that's what we're about to create.
	if (ML->getSrc0().isUndef())
	return SDValue();

	// The new masked load has an undef pass-through operand. The select uses the
	// original pass-through operand.
	SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
	ML->getMask(), DAG.getUNDEF(VT),
	ML->getMemoryVT(), ML->getMemOperand(),
	ML->getExtensionType());
	SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getSrc0());

	return DCI.CombineTo(ML, Blend, NewML.getValue(1), true);
	}

	static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);

	// TODO: Expanding load with constant mask may be optimized as well.
	if (Mld->isExpandingLoad())
	return SDValue();

	if (Mld->getExtensionType() == ISD::NON_EXTLOAD) {
	if (SDValue ScalarLoad = reduceMaskedLoadToScalarLoad(Mld, DAG, DCI))
	return ScalarLoad;
	// TODO: Do some AVX512 subsets benefit from this transform?
	if (!Subtarget.hasAVX512())
	if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI))
	return Blend;
	}

	if (Mld->getExtensionType() != ISD::SEXTLOAD)
	return SDValue();

	// Resolve extending loads.
	EVT VT = Mld->getValueType(0);
	unsigned NumElems = VT.getVectorNumElements();
	EVT LdVT = Mld->getMemoryVT();
	SDLoc dl(Mld);

	assert(LdVT != VT && "Cannot extend to the same type");
	unsigned ToSz = VT.getScalarSizeInBits();
	unsigned FromSz = LdVT.getScalarSizeInBits();
	// From/To sizes and ElemCount must be pow of two.
	assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
	"Unexpected size for extending masked load");

	unsigned SizeRatio = ToSz / FromSz;
	assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());

	// Create a type on which we perform the shuffle.
	EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
	LdVT.getScalarType(), NumElems*SizeRatio);
	assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());

	// Convert Src0 value.
	SDValue WideSrc0 = DAG.getBitcast(WideVecVT, Mld->getSrc0());
	if (!Mld->getSrc0().isUndef()) {
	SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
	for (unsigned i = 0; i != NumElems; ++i)
	ShuffleVec[i] = i * SizeRatio;

	// Can't shuffle using an illegal type.
	assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
	"WideVecVT should be legal");
	WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
	DAG.getUNDEF(WideVecVT), ShuffleVec);
	}
	// Prepare the new mask.
	SDValue NewMask;
	SDValue Mask = Mld->getMask();
	if (Mask.getValueType() == VT) {
	// Mask and original value have the same type.
	NewMask = DAG.getBitcast(WideVecVT, Mask);
	SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
	for (unsigned i = 0; i != NumElems; ++i)
	ShuffleVec[i] = i * SizeRatio;
	for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i)
	ShuffleVec[i] = NumElems * SizeRatio;
	NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
	DAG.getConstant(0, dl, WideVecVT),
	ShuffleVec);
	} else {
	assert(Mask.getValueType().getVectorElementType() == MVT::i1);
	unsigned WidenNumElts = NumElems*SizeRatio;
	unsigned MaskNumElts = VT.getVectorNumElements();
	EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
	WidenNumElts);

	unsigned NumConcat = WidenNumElts / MaskNumElts;
	SmallVector<SDValue, 16> Ops(NumConcat);
	SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType());
	Ops[0] = Mask;
	for (unsigned i = 1; i != NumConcat; ++i)
	Ops[i] = ZeroVal;

	NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
	}

	SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
	Mld->getBasePtr(), NewMask, WideSrc0,
	Mld->getMemoryVT(), Mld->getMemOperand(),
	ISD::NON_EXTLOAD);
	SDValue NewVec = getExtendInVec(X86ISD::VSEXT, dl, VT, WideLd, DAG);
	return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
	}

	/// If exactly one element of the mask is set for a non-truncating masked store,
	/// it is a vector extract and scalar store.
	/// Note: It is expected that the degenerate cases of an all-zeros or all-ones
	/// mask have already been optimized in IR, so we don't bother with those here.
	static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
	SelectionDAG &DAG) {
	// TODO: This is not x86-specific, so it could be lifted to DAGCombiner.
	// However, some target hooks may need to be added to know when the transform
	// is profitable. Endianness would also have to be considered.

	SDValue Addr, VecIndex;
	unsigned Alignment;
	if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment))
	return SDValue();

	// Extract the one scalar element that is actually being stored.
	SDLoc DL(MS);
	EVT VT = MS->getValue().getValueType();
	EVT EltVT = VT.getVectorElementType();
	SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
	MS->getValue(), VecIndex);

	// Store that element at the appropriate offset from the base pointer.
	return DAG.getStore(MS->getChain(), DL, Extract, Addr, MS->getPointerInfo(),
	Alignment, MS->getMemOperand()->getFlags());
	}

	static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);

	if (Mst->isCompressingStore())
	return SDValue();

	if (!Mst->isTruncatingStore())
	return reduceMaskedStoreToScalarStore(Mst, DAG);

	// Resolve truncating stores.
	EVT VT = Mst->getValue().getValueType();
	unsigned NumElems = VT.getVectorNumElements();
	EVT StVT = Mst->getMemoryVT();
	SDLoc dl(Mst);

	assert(StVT != VT && "Cannot truncate to the same type");
	unsigned FromSz = VT.getScalarSizeInBits();
	unsigned ToSz = StVT.getScalarSizeInBits();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// The truncating store is legal in some cases. For example
	// vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
	// are designated for truncate store.
	// In this case we don't need any further transformations.
	if (TLI.isTruncStoreLegal(VT, StVT))
	return SDValue();

	// From/To sizes and ElemCount must be pow of two.
	assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
	"Unexpected size for truncating masked store");
	// We are going to use the original vector elt for storing.
	// Accumulated smaller vector elements must be a multiple of the store size.
	assert (((NumElems * FromSz) % ToSz) == 0 &&
	"Unexpected ratio for truncating masked store");

	unsigned SizeRatio = FromSz / ToSz;
	assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());

	// Create a type on which we perform the shuffle.
	EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
	StVT.getScalarType(), NumElems*SizeRatio);

	assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());

	SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue());
	SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
	for (unsigned i = 0; i != NumElems; ++i)
	ShuffleVec[i] = i * SizeRatio;

	// Can't shuffle using an illegal type.
	assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
	"WideVecVT should be legal");

	SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
	DAG.getUNDEF(WideVecVT),
	ShuffleVec);

	SDValue NewMask;
	SDValue Mask = Mst->getMask();
	if (Mask.getValueType() == VT) {
	// Mask and original value have the same type.
	NewMask = DAG.getBitcast(WideVecVT, Mask);
	for (unsigned i = 0; i != NumElems; ++i)
	ShuffleVec[i] = i * SizeRatio;
	for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
	ShuffleVec[i] = NumElems*SizeRatio;
	NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
	DAG.getConstant(0, dl, WideVecVT),
	ShuffleVec);
	} else {
	assert(Mask.getValueType().getVectorElementType() == MVT::i1);
	unsigned WidenNumElts = NumElems*SizeRatio;
	unsigned MaskNumElts = VT.getVectorNumElements();
	EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
	WidenNumElts);

	unsigned NumConcat = WidenNumElts / MaskNumElts;
	SmallVector<SDValue, 16> Ops(NumConcat);
	SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType());
	Ops[0] = Mask;
	for (unsigned i = 1; i != NumConcat; ++i)
	Ops[i] = ZeroVal;

	NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
	}

	return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal,
	Mst->getBasePtr(), NewMask, StVT,
	Mst->getMemOperand(), false);
	}

	static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	StoreSDNode *St = cast<StoreSDNode>(N);
	EVT VT = St->getValue().getValueType();
	EVT StVT = St->getMemoryVT();
	SDLoc dl(St);
	SDValue StoredVal = St->getOperand(1);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// If we are saving a concatenation of two XMM registers and 32-byte stores
	// are slow, such as on Sandy Bridge, perform two 16-byte stores.
	bool Fast;
	unsigned AddressSpace = St->getAddressSpace();
	unsigned Alignment = St->getAlignment();
	if (VT.is256BitVector() && StVT == VT &&
	TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
	AddressSpace, Alignment, &Fast) &&
	!Fast) {
	unsigned NumElems = VT.getVectorNumElements();
	if (NumElems < 2)
	return SDValue();

	SDValue Value0 = extract128BitVector(StoredVal, 0, DAG, dl);
	SDValue Value1 = extract128BitVector(StoredVal, NumElems / 2, DAG, dl);

	SDValue Ptr0 = St->getBasePtr();
	SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 16, dl);

	SDValue Ch0 =
	DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(),
	Alignment, St->getMemOperand()->getFlags());
	SDValue Ch1 =
	DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(),
	std::min(16U, Alignment), St->getMemOperand()->getFlags());
	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
	}

	// Optimize trunc store (of multiple scalars) to shuffle and store.
	// First, pack all of the elements in one place. Next, store to memory
	// in fewer chunks.
	if (St->isTruncatingStore() && VT.isVector()) {
	// Check if we can detect an AVG pattern from the truncation. If yes,
	// replace the trunc store by a normal store with the result of X86ISD::AVG
	// instruction.
	if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG,
	Subtarget, dl))
	return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),
	St->getPointerInfo(), St->getAlignment(),
	St->getMemOperand()->getFlags());

	if (SDValue Val =
	detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
	return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
	dl, Val, St->getBasePtr(),
	St->getMemoryVT(), St->getMemOperand(), DAG);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	unsigned NumElems = VT.getVectorNumElements();
	assert(StVT != VT && "Cannot truncate to the same type");
	unsigned FromSz = VT.getScalarSizeInBits();
	unsigned ToSz = StVT.getScalarSizeInBits();

	// The truncating store is legal in some cases. For example
	// vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
	// are designated for truncate store.
	// In this case we don't need any further transformations.
	if (TLI.isTruncStoreLegalOrCustom(VT, StVT))
	return SDValue();

	// From, To sizes and ElemCount must be pow of two
	if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
	// We are going to use the original vector elt for storing.
	// Accumulated smaller vector elements must be a multiple of the store size.
	if (0 != (NumElems * FromSz) % ToSz) return SDValue();

	unsigned SizeRatio = FromSz / ToSz;

	assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());

	// Create a type on which we perform the shuffle
	EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
	StVT.getScalarType(), NumElems*SizeRatio);

	assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());

	SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue());
	SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
	for (unsigned i = 0; i != NumElems; ++i)
	ShuffleVec[i] = i * SizeRatio;

	// Can't shuffle using an illegal type.
	if (!TLI.isTypeLegal(WideVecVT))
	return SDValue();

	SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
	DAG.getUNDEF(WideVecVT),
	ShuffleVec);
	// At this point all of the data is stored at the bottom of the
	// register. We now need to save it to mem.

	// Find the largest store unit
	MVT StoreType = MVT::i8;
	for (MVT Tp : MVT::integer_valuetypes()) {
	if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz)
	StoreType = Tp;
	}

	// On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
	if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 &&
	(64 <= NumElems * ToSz))
	StoreType = MVT::f64;

	// Bitcast the original vector into a vector of store-size units
	EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
	StoreType, VT.getSizeInBits()/StoreType.getSizeInBits());
	assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
	SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff);
	SmallVector<SDValue, 8> Chains;
	SDValue Ptr = St->getBasePtr();

	// Perform one or more big stores into memory.
	for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) {
	SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
	StoreType, ShuffWide,
	DAG.getIntPtrConstant(i, dl));
	SDValue Ch =
	DAG.getStore(St->getChain(), dl, SubVec, Ptr, St->getPointerInfo(),
	St->getAlignment(), St->getMemOperand()->getFlags());
	Ptr = DAG.getMemBasePlusOffset(Ptr, StoreType.getStoreSize(), dl);
	Chains.push_back(Ch);
	}

	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
	}

	// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
	// the FP state in cases where an emms may be missing.
	// A preferable solution to the general problem is to figure out the right
	// places to insert EMMS. This qualifies as a quick hack.

	// Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
	if (VT.getSizeInBits() != 64)
	return SDValue();

	const Function *F = DAG.getMachineFunction().getFunction();
	bool NoImplicitFloatOps = F->hasFnAttribute(Attribute::NoImplicitFloat);
	bool F64IsLegal =
	!Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2();
	if ((VT.isVector() \|\|
	(VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) &&
	isa<LoadSDNode>(St->getValue()) &&
	!cast<LoadSDNode>(St->getValue())->isVolatile() &&
	St->getChain().hasOneUse() && !St->isVolatile()) {
	SDNode* LdVal = St->getValue().getNode();
	LoadSDNode *Ld = nullptr;
	int TokenFactorIndex = -1;
	SmallVector<SDValue, 8> Ops;
	SDNode* ChainVal = St->getChain().getNode();
	// Must be a store of a load. We currently handle two cases: the load
	// is a direct child, and it's under an intervening TokenFactor. It is
	// possible to dig deeper under nested TokenFactors.
	if (ChainVal == LdVal)
	Ld = cast<LoadSDNode>(St->getChain());
	else if (St->getValue().hasOneUse() &&
	ChainVal->getOpcode() == ISD::TokenFactor) {
	for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) {
	if (ChainVal->getOperand(i).getNode() == LdVal) {
	TokenFactorIndex = i;
	Ld = cast<LoadSDNode>(St->getValue());
	} else
	Ops.push_back(ChainVal->getOperand(i));
	}
	}

	if (!Ld \|\| !ISD::isNormalLoad(Ld))
	return SDValue();

	// If this is not the MMX case, i.e. we are just turning i64 load/store
	// into f64 load/store, avoid the transformation if there are multiple
	// uses of the loaded value.
	if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
	return SDValue();

	SDLoc LdDL(Ld);
	SDLoc StDL(N);
	// If we are a 64-bit capable x86, lower to a single movq load/store pair.
	// Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
	// pair instead.
	if (Subtarget.is64Bit() \|\| F64IsLegal) {
	MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
	SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
	Ld->getPointerInfo(), Ld->getAlignment(),
	Ld->getMemOperand()->getFlags());
	// Make sure new load is placed in same chain order.
	SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
	if (TokenFactorIndex >= 0) {
	Ops.push_back(NewChain);
	NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
	}
	return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
	St->getPointerInfo(), St->getAlignment(),
	St->getMemOperand()->getFlags());
	}

	// Otherwise, lower to two pairs of 32-bit loads / stores.
	SDValue LoAddr = Ld->getBasePtr();
	SDValue HiAddr = DAG.getMemBasePlusOffset(LoAddr, 4, LdDL);

	SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
	Ld->getPointerInfo(), Ld->getAlignment(),
	Ld->getMemOperand()->getFlags());
	SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
	Ld->getPointerInfo().getWithOffset(4),
	MinAlign(Ld->getAlignment(), 4),
	Ld->getMemOperand()->getFlags());
	// Make sure new loads are placed in same chain order.
	SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd);
	NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd);

	if (TokenFactorIndex >= 0) {
	Ops.push_back(NewChain);
	NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
	}

	LoAddr = St->getBasePtr();
	HiAddr = DAG.getMemBasePlusOffset(LoAddr, 4, StDL);

	SDValue LoSt =
	DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getPointerInfo(),
	St->getAlignment(), St->getMemOperand()->getFlags());
	SDValue HiSt = DAG.getStore(
	NewChain, StDL, HiLd, HiAddr, St->getPointerInfo().getWithOffset(4),
	MinAlign(St->getAlignment(), 4), St->getMemOperand()->getFlags());
	return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
	}

	// This is similar to the above case, but here we handle a scalar 64-bit
	// integer store that is extracted from a vector on a 32-bit target.
	// If we have SSE2, then we can treat it like a floating-point double
	// to get past legalization. The execution dependencies fixup pass will
	// choose the optimal machine instruction for the store if this really is
	// an integer or v2f32 rather than an f64.
	if (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit() &&
	St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
	SDValue OldExtract = St->getOperand(1);
	SDValue ExtOp0 = OldExtract.getOperand(0);
	unsigned VecSize = ExtOp0.getValueSizeInBits();
	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64);
	SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0);
	SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
	BitCast, OldExtract.getOperand(1));
	return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(),
	St->getPointerInfo(), St->getAlignment(),
	St->getMemOperand()->getFlags());
	}

	return SDValue();
	}

	/// Return 'true' if this vector operation is "horizontal"
	/// and return the operands for the horizontal operation in LHS and RHS. A
	/// horizontal operation performs the binary operation on successive elements
	/// of its first operand, then on successive elements of its second operand,
	/// returning the resulting values in a vector. For example, if
	/// A = < float a0, float a1, float a2, float a3 >
	/// and
	/// B = < float b0, float b1, float b2, float b3 >
	/// then the result of doing a horizontal operation on A and B is
	/// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >.
	/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
	/// A horizontal-op B, for some already available A and B, and if so then LHS is
	/// set to A, RHS to B, and the routine returns 'true'.
	/// Note that the binary operation should have the property that if one of the
	/// operands is UNDEF then the result is UNDEF.
	static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
	// Look for the following pattern: if
	// A = < float a0, float a1, float a2, float a3 >
	// B = < float b0, float b1, float b2, float b3 >
	// and
	// LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6>
	// RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7>
	// then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
	// which is A horizontal-op B.

	// At least one of the operands should be a vector shuffle.
	if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
	RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
	return false;

	MVT VT = LHS.getSimpleValueType();

	assert((VT.is128BitVector() \|\| VT.is256BitVector()) &&
	"Unsupported vector type for horizontal add/sub");

	// Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
	// operate independently on 128-bit lanes.
	unsigned NumElts = VT.getVectorNumElements();
	unsigned NumLanes = VT.getSizeInBits()/128;
	unsigned NumLaneElts = NumElts / NumLanes;
	assert((NumLaneElts % 2 == 0) &&
	"Vector type should have an even number of elements in each lane");
	unsigned HalfLaneElts = NumLaneElts/2;

	// View LHS in the form
	// LHS = VECTOR_SHUFFLE A, B, LMask
	// If LHS is not a shuffle then pretend it is the shuffle
	// LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
	// NOTE: in what follows a default initialized SDValue represents an UNDEF of
	// type VT.
	SDValue A, B;
	SmallVector<int, 16> LMask(NumElts);
	if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
	if (!LHS.getOperand(0).isUndef())
	A = LHS.getOperand(0);
	if (!LHS.getOperand(1).isUndef())
	B = LHS.getOperand(1);
	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
	std::copy(Mask.begin(), Mask.end(), LMask.begin());
	} else {
	if (!LHS.isUndef())
	A = LHS;
	for (unsigned i = 0; i != NumElts; ++i)
	LMask[i] = i;
	}

	// Likewise, view RHS in the form
	// RHS = VECTOR_SHUFFLE C, D, RMask
	SDValue C, D;
	SmallVector<int, 16> RMask(NumElts);
	if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
	if (!RHS.getOperand(0).isUndef())
	C = RHS.getOperand(0);
	if (!RHS.getOperand(1).isUndef())
	D = RHS.getOperand(1);
	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
	std::copy(Mask.begin(), Mask.end(), RMask.begin());
	} else {
	if (!RHS.isUndef())
	C = RHS;
	for (unsigned i = 0; i != NumElts; ++i)
	RMask[i] = i;
	}

	// Check that the shuffles are both shuffling the same vectors.
	if (!(A == C && B == D) && !(A == D && B == C))
	return false;

	// If everything is UNDEF then bail out: it would be better to fold to UNDEF.
	if (!A.getNode() && !B.getNode())
	return false;

	// If A and B occur in reverse order in RHS, then "swap" them (which means
	// rewriting the mask).
	if (A != C)
	ShuffleVectorSDNode::commuteMask(RMask);

	// At this point LHS and RHS are equivalent to
	// LHS = VECTOR_SHUFFLE A, B, LMask
	// RHS = VECTOR_SHUFFLE A, B, RMask
	// Check that the masks correspond to performing a horizontal operation.
	for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
	for (unsigned i = 0; i != NumLaneElts; ++i) {
	int LIdx = LMask[i+l], RIdx = RMask[i+l];

	// Ignore any UNDEF components.
	if (LIdx < 0 \|\| RIdx < 0 \|\|
	(!A.getNode() && (LIdx < (int)NumElts \|\| RIdx < (int)NumElts)) \|\|
	(!B.getNode() && (LIdx >= (int)NumElts \|\| RIdx >= (int)NumElts)))
	continue;

	// Check that successive elements are being operated on. If not, this is
	// not a horizontal operation.
	unsigned Src = (i/HalfLaneElts); // each lane is split between srcs
	int Index = 2(i%HalfLaneElts) + NumEltsSrc + l;
	if (!(LIdx == Index && RIdx == Index + 1) &&
	!(IsCommutative && LIdx == Index + 1 && RIdx == Index))
	return false;
	}
	}

	LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
	RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
	return true;
	}

	/// Do target-specific dag combines on floating-point adds/subs.
	static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	bool IsFadd = N->getOpcode() == ISD::FADD;
	assert((IsFadd \|\| N->getOpcode() == ISD::FSUB) && "Wrong opcode");

	// Try to synthesize horizontal add/sub from adds/subs of shuffles.
	if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 \|\| VT == MVT::v2f64)) \|\|
	(Subtarget.hasFp256() && (VT == MVT::v8f32 \|\| VT == MVT::v4f64))) &&
	isHorizontalBinOp(LHS, RHS, IsFadd)) {
	auto NewOpcode = IsFadd ? X86ISD::FHADD : X86ISD::FHSUB;
	return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
	}
	return SDValue();
	}

	/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
	/// the codegen.
	/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
	static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget,
	SDLoc &DL) {
	assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");
	SDValue Src = N->getOperand(0);
	unsigned Opcode = Src.getOpcode();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	EVT VT = N->getValueType(0);
	EVT SrcVT = Src.getValueType();

	auto IsRepeatedOpOrFreeTruncation = [VT](SDValue Op0, SDValue Op1) {
	unsigned TruncSizeInBits = VT.getScalarSizeInBits();

	// Repeated operand, so we are only trading one output truncation for
	// one input truncation.
	if (Op0 == Op1)
	return true;

	// See if either operand has been extended from a smaller/equal size to
	// the truncation size, allowing a truncation to combine with the extend.
	unsigned Opcode0 = Op0.getOpcode();
	if ((Opcode0 == ISD::ANY_EXTEND \|\| Opcode0 == ISD::SIGN_EXTEND \|\|
	Opcode0 == ISD::ZERO_EXTEND) &&
	Op0.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
	return true;

	unsigned Opcode1 = Op1.getOpcode();
	if ((Opcode1 == ISD::ANY_EXTEND \|\| Opcode1 == ISD::SIGN_EXTEND \|\|
	Opcode1 == ISD::ZERO_EXTEND) &&
	Op1.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
	return true;

	// See if either operand is a single use constant which can be constant
	// folded.
	SDValue BC0 = peekThroughOneUseBitcasts(Op0);
	SDValue BC1 = peekThroughOneUseBitcasts(Op1);
	return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) \|\|
	ISD::isBuildVectorOfConstantSDNodes(BC1.getNode());
	};

	auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
	SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
	SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
	return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1);
	};

	// Don't combine if the operation has other uses.
	if (!N->isOnlyUserOf(Src.getNode()))
	return SDValue();

	// Only support vector truncation for now.
	// TODO: i64 scalar math would benefit as well.
	if (!VT.isVector())
	return SDValue();

	// In most cases its only worth pre-truncating if we're only facing the cost
	// of one truncation.
	// i.e. if one of the inputs will constant fold or the input is repeated.
	switch (Opcode) {
	case ISD::AND:
	case ISD::XOR:
	case ISD::OR: {
	SDValue Op0 = Src.getOperand(0);
	SDValue Op1 = Src.getOperand(1);
	if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
	IsRepeatedOpOrFreeTruncation(Op0, Op1))
	return TruncateArithmetic(Op0, Op1);
	break;
	}

	case ISD::MUL:
	// X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
	// better to truncate if we have the chance.
	if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
	!TLI.isOperationLegal(Opcode, SrcVT))
	return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
	LLVM_FALLTHROUGH;
	case ISD::ADD: {
	SDValue Op0 = Src.getOperand(0);
	SDValue Op1 = Src.getOperand(1);
	if (TLI.isOperationLegal(Opcode, VT) &&
	IsRepeatedOpOrFreeTruncation(Op0, Op1))
	return TruncateArithmetic(Op0, Op1);
	break;
	}
	}

	return SDValue();
	}

	/// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS.
	static SDValue
	combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
	SmallVector<SDValue, 8> &Regs) {
	assert(Regs.size() > 0 && (Regs[0].getValueType() == MVT::v4i32 \|\|
	Regs[0].getValueType() == MVT::v2i64));
	EVT OutVT = N->getValueType(0);
	EVT OutSVT = OutVT.getVectorElementType();
	EVT InVT = Regs[0].getValueType();
	EVT InSVT = InVT.getVectorElementType();
	SDLoc DL(N);

	// First, use mask to unset all bits that won't appear in the result.
	assert((OutSVT == MVT::i8 \|\| OutSVT == MVT::i16) &&
	"OutSVT can only be either i8 or i16.");
	APInt Mask =
	APInt::getLowBitsSet(InSVT.getSizeInBits(), OutSVT.getSizeInBits());
	SDValue MaskVal = DAG.getConstant(Mask, DL, InVT);
	for (auto &Reg : Regs)
	Reg = DAG.getNode(ISD::AND, DL, InVT, MaskVal, Reg);

	MVT UnpackedVT, PackedVT;
	if (OutSVT == MVT::i8) {
	UnpackedVT = MVT::v8i16;
	PackedVT = MVT::v16i8;
	} else {
	UnpackedVT = MVT::v4i32;
	PackedVT = MVT::v8i16;
	}

	// In each iteration, truncate the type by a half size.
	auto RegNum = Regs.size();
	for (unsigned j = 1, e = InSVT.getSizeInBits() / OutSVT.getSizeInBits();
	j < e; j *= 2, RegNum /= 2) {
	for (unsigned i = 0; i < RegNum; i++)
	Regs[i] = DAG.getBitcast(UnpackedVT, Regs[i]);
	for (unsigned i = 0; i < RegNum / 2; i++)
	Regs[i] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[i * 2],
	Regs[i * 2 + 1]);
	}

	// If the type of the result is v8i8, we need do one more X86ISD::PACKUS, and
	// then extract a subvector as the result since v8i8 is not a legal type.
	if (OutVT == MVT::v8i8) {
	Regs[0] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[0], Regs[0]);
	Regs[0] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, Regs[0],
	DAG.getIntPtrConstant(0, DL));
	return Regs[0];
	} else if (RegNum > 1) {
	Regs.resize(RegNum);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs);
	} else
	return Regs[0];
	}

	/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
	static SDValue
	combineVectorTruncationWithPACKSS(SDNode *N, const X86Subtarget &Subtarget,
	SelectionDAG &DAG,
	SmallVector<SDValue, 8> &Regs) {
	assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32);
	EVT OutVT = N->getValueType(0);
	SDLoc DL(N);

	// Shift left by 16 bits, then arithmetic-shift right by 16 bits.
	SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32);
	for (auto &Reg : Regs) {
	Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt,
	Subtarget, DAG);
	Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt,
	Subtarget, DAG);
	}

	for (unsigned i = 0, e = Regs.size() / 2; i < e; i++)
	Regs[i] = DAG.getNode(X86ISD::PACKSS, DL, MVT::v8i16, Regs[i * 2],
	Regs[i * 2 + 1]);

	if (Regs.size() > 2) {
	Regs.resize(Regs.size() / 2);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs);
	} else
	return Regs[0];
	}

	/// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into
	/// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type
	/// legalization the truncation will be translated into a BUILD_VECTOR with each
	/// element that is extracted from a vector and then truncated, and it is
	/// difficult to do this optimization based on them.
	static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT OutVT = N->getValueType(0);
	if (!OutVT.isVector())
	return SDValue();

	SDValue In = N->getOperand(0);
	if (!In.getValueType().isSimple())
	return SDValue();

	EVT InVT = In.getValueType();
	unsigned NumElems = OutVT.getVectorNumElements();

	// TODO: On AVX2, the behavior of X86ISD::PACKUS is different from that on
	// SSE2, and we need to take care of it specially.
	// AVX512 provides vpmovdb.
	if (!Subtarget.hasSSE2() \|\| Subtarget.hasAVX2())
	return SDValue();

	EVT OutSVT = OutVT.getVectorElementType();
	EVT InSVT = InVT.getVectorElementType();
	if (!((InSVT == MVT::i32 \|\| InSVT == MVT::i64) &&
	(OutSVT == MVT::i8 \|\| OutSVT == MVT::i16) && isPowerOf2_32(NumElems) &&
	NumElems >= 8))
	return SDValue();

	// SSSE3's pshufb results in less instructions in the cases below.
	if (Subtarget.hasSSSE3() && NumElems == 8 &&
	((OutSVT == MVT::i8 && InSVT != MVT::i64) \|\|
	(InSVT == MVT::i32 && OutSVT == MVT::i16)))
	return SDValue();

	SDLoc DL(N);

	// Split a long vector into vectors of legal type.
	unsigned RegNum = InVT.getSizeInBits() / 128;
	SmallVector<SDValue, 8> SubVec(RegNum);
	unsigned NumSubRegElts = 128 / InSVT.getSizeInBits();
	EVT SubRegVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubRegElts);

	for (unsigned i = 0; i < RegNum; i++)
	SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubRegVT, In,
	DAG.getIntPtrConstant(i * NumSubRegElts, DL));

	// SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS
	// for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
	// truncate 2 x v4i32 to v8i16.
	if (Subtarget.hasSSE41() \|\| OutSVT == MVT::i8)
	return combineVectorTruncationWithPACKUS(N, DAG, SubVec);
	else if (InSVT == MVT::i32)
	return combineVectorTruncationWithPACKSS(N, Subtarget, DAG, SubVec);
	else
	return SDValue();
	}

	/// This function transforms vector truncation of 'all or none' bits values.
	/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS operations.
	static SDValue combineVectorSignBitsTruncation(SDNode *N, SDLoc &DL,
	SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// Requires SSE2 but AVX512 has fast truncate.
	if (!Subtarget.hasSSE2() \|\| Subtarget.hasAVX512())
	return SDValue();

	if (!N->getValueType(0).isVector() \|\| !N->getValueType(0).isSimple())
	return SDValue();

	SDValue In = N->getOperand(0);
	if (!In.getValueType().isSimple())
	return SDValue();

	MVT VT = N->getValueType(0).getSimpleVT();
	MVT SVT = VT.getScalarType();

	MVT InVT = In.getValueType().getSimpleVT();
	MVT InSVT = InVT.getScalarType();

	// Use PACKSS if the input is a splatted sign bit.
	// e.g. Comparison result, sext_in_reg, etc.
	unsigned NumSignBits = DAG.ComputeNumSignBits(In);
	if (NumSignBits != InSVT.getSizeInBits())
	return SDValue();

	// Check we have a truncation suited for PACKSS.
	if (!VT.is128BitVector() && !VT.is256BitVector())
	return SDValue();
	if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32)
	return SDValue();
	if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64)
	return SDValue();

	return truncateVectorCompareWithPACKSS(VT, In, DL, DAG, Subtarget);
	}

	static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	SDValue Src = N->getOperand(0);
	SDLoc DL(N);

	// Attempt to pre-truncate inputs to arithmetic ops instead.
	if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))
	return V;

	// Try to detect AVG pattern first.
	if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
	return Avg;

	// Try to combine truncation with unsigned saturation.
	if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
	return Val;

	// The bitcast source is a direct mmx result.
	// Detect bitcasts between i32 to x86mmx
	if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
	SDValue BCSrc = Src.getOperand(0);
	if (BCSrc.getValueType() == MVT::x86mmx)
	return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);
	}

	// Try to truncate extended sign bits with PACKSS.
	if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget))
	return V;

	return combineVectorTruncation(N, DAG, Subtarget);
	}

	/// Returns the negated value if the node \p N flips sign of FP value.
	///
	/// FP-negation node may have different forms: FNEG(x) or FXOR (x, 0x80000000).
	/// AVX512F does not have FXOR, so FNEG is lowered as
	/// (bitcast (xor (bitcast x), (bitcast ConstantFP(0x80000000)))).
	/// In this case we go though all bitcasts.
	static SDValue isFNEG(SDNode *N) {
	if (N->getOpcode() == ISD::FNEG)
	return N->getOperand(0);

	SDValue Op = peekThroughBitcasts(SDValue(N, 0));
	if (Op.getOpcode() != X86ISD::FXOR && Op.getOpcode() != ISD::XOR)
	return SDValue();

	SDValue Op1 = peekThroughBitcasts(Op.getOperand(1));
	if (!Op1.getValueType().isFloatingPoint())
	return SDValue();

	SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));

	unsigned EltBits = Op1.getScalarValueSizeInBits();
	auto isSignMask = [&](const ConstantFP *C) {
	return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits);
	};

	// There is more than one way to represent the same constant on
	// the different X86 targets. The type of the node may also depend on size.
	// - load scalar value and broadcast
	// - BUILD_VECTOR node
	// - load from a constant pool.
	// We check all variants here.
	if (Op1.getOpcode() == X86ISD::VBROADCAST) {
	if (auto *C = getTargetConstantFromNode(Op1.getOperand(0)))
	if (isSignMask(cast<ConstantFP>(C)))
	return Op0;

	} else if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1)) {
	if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode())
	if (isSignMask(CN->getConstantFPValue()))
	return Op0;

	} else if (auto *C = getTargetConstantFromNode(Op1)) {
	if (C->getType()->isVectorTy()) {
	if (auto *SplatV = C->getSplatValue())
	if (isSignMask(cast<ConstantFP>(SplatV)))
	return Op0;
	} else if (auto *FPConst = dyn_cast<ConstantFP>(C))
	if (isSignMask(FPConst))
	return Op0;
	}
	return SDValue();
	}

	/// Do target-specific dag combines on floating point negations.
	static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT OrigVT = N->getValueType(0);
	SDValue Arg = isFNEG(N);
	assert(Arg.getNode() && "N is expected to be an FNEG node");

	EVT VT = Arg.getValueType();
	EVT SVT = VT.getScalarType();
	SDLoc DL(N);

	// Let legalize expand this if it isn't a legal type yet.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	// If we're negating a FMUL node on a target with FMA, then we can avoid the
	// use of a constant by performing (-0 - A*B) instead.
	// FIXME: Check rounding control flags as well once it becomes available.
	if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 \|\| SVT == MVT::f64) &&
	Arg->getFlags().hasNoSignedZeros() && Subtarget.hasAnyFMA()) {
	SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
	SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
	Arg.getOperand(1), Zero);
	return DAG.getBitcast(OrigVT, NewNode);
	}

	// If we're negating an FMA node, then we can adjust the
	// instruction to include the extra negation.
	unsigned NewOpcode = 0;
	if (Arg.hasOneUse()) {
	switch (Arg.getOpcode()) {
	case X86ISD::FMADD: NewOpcode = X86ISD::FNMSUB; break;
	case X86ISD::FMSUB: NewOpcode = X86ISD::FNMADD; break;
	case X86ISD::FNMADD: NewOpcode = X86ISD::FMSUB; break;
	case X86ISD::FNMSUB: NewOpcode = X86ISD::FMADD; break;
	case X86ISD::FMADD_RND: NewOpcode = X86ISD::FNMSUB_RND; break;
	case X86ISD::FMSUB_RND: NewOpcode = X86ISD::FNMADD_RND; break;
	case X86ISD::FNMADD_RND: NewOpcode = X86ISD::FMSUB_RND; break;
	case X86ISD::FNMSUB_RND: NewOpcode = X86ISD::FMADD_RND; break;
	// We can't handle scalar intrinsic node here because it would only
	// invert one element and not the whole vector. But we could try to handle
	// a negation of the lower element only.
	}
	}
	if (NewOpcode)
	return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT,
	Arg.getNode()->ops()));

	return SDValue();
	}

	static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = N->getSimpleValueType(0);
	// If we have integer vector types available, use the integer opcodes.
	if (VT.isVector() && Subtarget.hasSSE2()) {
	SDLoc dl(N);

	MVT IntVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);

	SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
	SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
	unsigned IntOpcode;
	switch (N->getOpcode()) {
	default: llvm_unreachable("Unexpected FP logic op");
	case X86ISD::FOR: IntOpcode = ISD::OR; break;
	case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
	case X86ISD::FAND: IntOpcode = ISD::AND; break;
	case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
	}
	SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
	return DAG.getBitcast(VT, IntOp);
	}
	return SDValue();
	}

	static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
	return Cmp;

	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
	return RV;

	if (Subtarget.hasCMov())
	if (SDValue RV = combineIntegerAbs(N, DAG))
	return RV;

	if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
	return FPLogic;

	if (isFNEG(N))
	return combineFneg(N, DAG, Subtarget);
	return SDValue();
	}


	static bool isNullFPScalarOrVectorConst(SDValue V) {
	return isNullFPConstant(V) \|\| ISD::isBuildVectorAllZeros(V.getNode());
	}

	/// If a value is a scalar FP zero or a vector FP zero (potentially including
	/// undefined elements), return a zero constant that may be used to fold away
	/// that value. In the case of a vector, the returned constant will not contain
	/// undefined elements even if the input parameter does. This makes it suitable
	/// to be used as a replacement operand with operations (eg, bitwise-and) where
	/// an undef should not propagate.
	static SDValue getNullFPConstForNullVal(SDValue V, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (!isNullFPScalarOrVectorConst(V))
	return SDValue();

	if (V.getValueType().isVector())
	return getZeroVector(V.getSimpleValueType(), Subtarget, DAG, SDLoc(V));

	return V;
	}

	static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	// Vector types are handled in combineANDXORWithAllOnesIntoANDNP().
	if (!((VT == MVT::f32 && Subtarget.hasSSE1()) \|\|
	(VT == MVT::f64 && Subtarget.hasSSE2())))
	return SDValue();

	auto isAllOnesConstantFP = [](SDValue V) {
	auto *C = dyn_cast<ConstantFPSDNode>(V);
	return C && C->getConstantFPValue()->isAllOnesValue();
	};

	// fand (fxor X, -1), Y --> fandn X, Y
	if (N0.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N0.getOperand(1)))
	return DAG.getNode(X86ISD::FANDN, DL, VT, N0.getOperand(0), N1);

	// fand X, (fxor Y, -1) --> fandn Y, X
	if (N1.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N1.getOperand(1)))
	return DAG.getNode(X86ISD::FANDN, DL, VT, N1.getOperand(0), N0);

	return SDValue();
	}

	/// Do target-specific dag combines on X86ISD::FAND nodes.
	static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// FAND(0.0, x) -> 0.0
	if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget))
	return V;

	// FAND(x, 0.0) -> 0.0
	if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
	return V;

	if (SDValue V = combineFAndFNotToFAndn(N, DAG, Subtarget))
	return V;

	return lowerX86FPLogicOp(N, DAG, Subtarget);
	}

	/// Do target-specific dag combines on X86ISD::FANDN nodes.
	static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// FANDN(0.0, x) -> x
	if (isNullFPScalarOrVectorConst(N->getOperand(0)))
	return N->getOperand(1);

	// FANDN(x, 0.0) -> 0.0
	if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
	return V;

	return lowerX86FPLogicOp(N, DAG, Subtarget);
	}

	/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
	static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	assert(N->getOpcode() == X86ISD::FOR \|\| N->getOpcode() == X86ISD::FXOR);

	// F[X]OR(0.0, x) -> x
	if (isNullFPScalarOrVectorConst(N->getOperand(0)))
	return N->getOperand(1);

	// F[X]OR(x, 0.0) -> x
	if (isNullFPScalarOrVectorConst(N->getOperand(1)))
	return N->getOperand(0);

	if (isFNEG(N))
	if (SDValue NewVal = combineFneg(N, DAG, Subtarget))
	return NewVal;

	return lowerX86FPLogicOp(N, DAG, Subtarget);
	}

	/// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
	static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
	assert(N->getOpcode() == X86ISD::FMIN \|\| N->getOpcode() == X86ISD::FMAX);

	// Only perform optimizations if UnsafeMath is used.
	if (!DAG.getTarget().Options.UnsafeFPMath)
	return SDValue();

	// If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
	// into FMINC and FMAXC, which are Commutative operations.
	unsigned NewOp = 0;
	switch (N->getOpcode()) {
	default: llvm_unreachable("unknown opcode");
	case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
	case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
	}

	return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0),
	N->getOperand(0), N->getOperand(1));
	}

	static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (Subtarget.useSoftFloat())
	return SDValue();

	// TODO: Check for global or instruction-level "nnan". In that case, we
	// should be able to lower to FMAX/FMIN alone.
	// TODO: If an operand is already known to be a NaN or not a NaN, this
	// should be an optional swap and FMAX/FMIN.

	EVT VT = N->getValueType(0);
	if (!((Subtarget.hasSSE1() && (VT == MVT::f32 \|\| VT == MVT::v4f32)) \|\|
	(Subtarget.hasSSE2() && (VT == MVT::f64 \|\| VT == MVT::v2f64)) \|\|
	(Subtarget.hasAVX() && (VT == MVT::v8f32 \|\| VT == MVT::v4f64))))
	return SDValue();

	// This takes at least 3 instructions, so favor a library call when operating
	// on a scalar and minimizing code size.
	if (!VT.isVector() && DAG.getMachineFunction().getFunction()->optForMinSize())
	return SDValue();

	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);
	SDLoc DL(N);
	EVT SetCCType = DAG.getTargetLoweringInfo().getSetCCResultType(
	DAG.getDataLayout(), *DAG.getContext(), VT);

	// There are 4 possibilities involving NaN inputs, and these are the required
	// outputs:
	// Op1
	// Num NaN
	// ----------------
	// Num \| Max \| Op0 \|
	// Op0 ----------------
	// NaN \| Op1 \| NaN \|
	// ----------------
	//
	// The SSE FP max/min instructions were not designed for this case, but rather
	// to implement:
	// Min = Op1 < Op0 ? Op1 : Op0
	// Max = Op1 > Op0 ? Op1 : Op0
	//
	// So they always return Op0 if either input is a NaN. However, we can still
	// use those instructions for fmaxnum by selecting away a NaN input.

	// If either operand is NaN, the 2nd source operand (Op0) is passed through.
	auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN;
	SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0);
	SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType , Op0, Op0, ISD::SETUO);

	// If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands
	// are NaN, the NaN value of Op1 is the result.
	return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax);
	}

	/// Do target-specific dag combines on X86ISD::ANDNP nodes.
	static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	// ANDNP(0, x) -> x
	if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
	return N->getOperand(1);

	// ANDNP(x, 0) -> 0
	if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode()))
	return getZeroVector(N->getSimpleValueType(0), Subtarget, DAG, SDLoc(N));

	EVT VT = N->getValueType(0);

	// Attempt to recursively combine a bitmask ANDNP with shuffles.
	if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
	SDValue Op(N, 0);
	SmallVector<int, 1> NonceMask; // Just a placeholder.
	NonceMask.push_back(0);
	if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
	/Depth/ 1, /HasVarMask/ false, DAG,
	DCI, Subtarget))
	return SDValue(); // This routine will use CombineTo to replace N.
	}

	return SDValue();
	}

	static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {
	// BT ignores high bits in the bit index operand.
	SDValue Op1 = N->getOperand(1);
	if (Op1.hasOneUse()) {
	unsigned BitWidth = Op1.getValueSizeInBits();
	APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
	KnownBits Known;
	TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
	!DCI.isBeforeLegalizeOps());
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.ShrinkDemandedConstant(Op1, DemandedMask, TLO) \|\|
	TLI.SimplifyDemandedBits(Op1, DemandedMask, Known, TLO))
	DCI.CommitTargetLoweringOpt(TLO);
	}
	return SDValue();
	}

	static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	EVT VT = N->getValueType(0);
	if (!VT.isVector())
	return SDValue();

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
	SDLoc dl(N);

	// The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
	// both SSE and AVX2 since there is no sign-extended shift right
	// operation on a vector with 64-bit elements.
	//(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
	// (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
	if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND \|\|
	N0.getOpcode() == ISD::SIGN_EXTEND)) {
	SDValue N00 = N0.getOperand(0);

	// EXTLOAD has a better solution on AVX2,
	// it may be replaced with X86ISD::VSEXT node.
	if (N00.getOpcode() == ISD::LOAD && Subtarget.hasInt256())
	if (!ISD::isNormalLoad(N00.getNode()))
	return SDValue();

	if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
	SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
	N00, N1);
	return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
	}
	}
	return SDValue();
	}

	/// sext(add_nsw(x, C)) --> add(sext(x), C_sext)
	/// zext(add_nuw(x, C)) --> add(zext(x), C_zext)
	/// Promoting a sign/zero extension ahead of a no overflow 'add' exposes
	/// opportunities to combine math ops, use an LEA, or use a complex addressing
	/// mode. This can eliminate extend, add, and shift instructions.
	static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	if (Ext->getOpcode() != ISD::SIGN_EXTEND &&
	Ext->getOpcode() != ISD::ZERO_EXTEND)
	return SDValue();

	// TODO: This should be valid for other integer types.
	EVT VT = Ext->getValueType(0);
	if (VT != MVT::i64)
	return SDValue();

	SDValue Add = Ext->getOperand(0);
	if (Add.getOpcode() != ISD::ADD)
	return SDValue();

	bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND;
	bool NSW = Add->getFlags().hasNoSignedWrap();
	bool NUW = Add->getFlags().hasNoUnsignedWrap();

	// We need an 'add nsw' feeding into the 'sext' or 'add nuw' feeding
	// into the 'zext'
	if ((Sext && !NSW) \|\| (!Sext && !NUW))
	return SDValue();

	// Having a constant operand to the 'add' ensures that we are not increasing
	// the instruction count because the constant is extended for free below.
	// A constant operand can also become the displacement field of an LEA.
	auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1));
	if (!AddOp1)
	return SDValue();

	// Don't make the 'add' bigger if there's no hope of combining it with some
	// other 'add' or 'shl' instruction.
	// TODO: It may be profitable to generate simpler LEA instructions in place
	// of single 'add' instructions, but the cost model for selecting an LEA
	// currently has a high threshold.
	bool HasLEAPotential = false;
	for (auto *User : Ext->uses()) {
	if (User->getOpcode() == ISD::ADD \|\| User->getOpcode() == ISD::SHL) {
	HasLEAPotential = true;
	break;
	}
	}
	if (!HasLEAPotential)
	return SDValue();

	// Everything looks good, so pull the '{s\|z}ext' ahead of the 'add'.
	int64_t AddConstant = Sext ? AddOp1->getSExtValue() : AddOp1->getZExtValue();
	SDValue AddOp0 = Add.getOperand(0);
	SDValue NewExt = DAG.getNode(Ext->getOpcode(), SDLoc(Ext), VT, AddOp0);
	SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT);

	// The wider add is guaranteed to not wrap because both operands are
	// sign-extended.
	SDNodeFlags Flags;
	Flags.setNoSignedWrap(NSW);
	Flags.setNoUnsignedWrap(NUW);
	return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags);
	}

	/// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) ->
	/// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y)
	/// This exposes the {s/z}ext to the sdivrem lowering, so that it directly
	/// extends from AH (which we otherwise need to do contortions to access).
	static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) {
	SDValue N0 = N->getOperand(0);
	auto OpcodeN = N->getOpcode();
	auto OpcodeN0 = N0.getOpcode();
	if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) \|\|
	(OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM)))
	return SDValue();

	EVT VT = N->getValueType(0);
	EVT InVT = N0.getValueType();
	if (N0.getResNo() != 1 \|\| InVT != MVT::i8 \|\| VT != MVT::i32)
	return SDValue();

	SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
	auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG
	: X86ISD::UDIVREM8_ZEXT_HREG;
	SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0),
	N0.getOperand(1));
	DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
	return R.getValue(1);
	}

	/// Convert a SEXT or ZEXT of a vector to a SIGN_EXTEND_VECTOR_INREG or
	/// ZERO_EXTEND_VECTOR_INREG, this requires the splitting (or concatenating
	/// with UNDEFs) of the input to vectors of the same size as the target type
	/// which then extends the lowest elements.
	static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	unsigned Opcode = N->getOpcode();
	if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND)
	return SDValue();
	if (!DCI.isBeforeLegalizeOps())
	return SDValue();
	if (!Subtarget.hasSSE2())
	return SDValue();

	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	EVT SVT = VT.getScalarType();
	EVT InVT = N0.getValueType();
	EVT InSVT = InVT.getScalarType();

	// Input type must be a vector and we must be extending legal integer types.
	if (!VT.isVector())
	return SDValue();
	if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
	return SDValue();
	if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
	return SDValue();

	// On AVX2+ targets, if the input/output types are both legal then we will be
	// able to use SIGN_EXTEND/ZERO_EXTEND directly.
	if (Subtarget.hasInt256() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
	DAG.getTargetLoweringInfo().isTypeLegal(InVT))
	return SDValue();

	SDLoc DL(N);

	auto ExtendVecSize = [&DAG](const SDLoc &DL, SDValue N, unsigned Size) {
	EVT InVT = N.getValueType();
	EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
	Size / InVT.getScalarSizeInBits());
	SmallVector<SDValue, 8> Opnds(Size / InVT.getSizeInBits(),
	DAG.getUNDEF(InVT));
	Opnds[0] = N;
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
	};

	// If target-size is less than 128-bits, extend to a type that would extend
	// to 128 bits, extend that and extract the original target vector.
	if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits())) {
	unsigned Scale = 128 / VT.getSizeInBits();
	EVT ExVT =
	EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits());
	SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits());
	SDValue SExt = DAG.getNode(Opcode, DL, ExVT, Ex);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt,
	DAG.getIntPtrConstant(0, DL));
	}

	// If target-size is 128-bits (or 256-bits on AVX2 target), then convert to
	// ISD::_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::VEXT.
	// Also use this if we don't have SSE41 to allow the legalizer do its job.
	if (!Subtarget.hasSSE41() \|\| VT.is128BitVector() \|\|
	(VT.is256BitVector() && Subtarget.hasInt256()) \|\|
	(VT.is512BitVector() && Subtarget.hasAVX512())) {
	SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
	return Opcode == ISD::SIGN_EXTEND
	? DAG.getSignExtendVectorInReg(ExOp, DL, VT)
	: DAG.getZeroExtendVectorInReg(ExOp, DL, VT);
	}

	auto SplitAndExtendInReg = [&](unsigned SplitSize) {
	unsigned NumVecs = VT.getSizeInBits() / SplitSize;
	unsigned NumSubElts = SplitSize / SVT.getSizeInBits();
	EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
	EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);

	SmallVector<SDValue, 8> Opnds;
	for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
	SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
	DAG.getIntPtrConstant(Offset, DL));
	SrcVec = ExtendVecSize(DL, SrcVec, SplitSize);
	SrcVec = Opcode == ISD::SIGN_EXTEND
	? DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT)
	: DAG.getZeroExtendVectorInReg(SrcVec, DL, SubVT);
	Opnds.push_back(SrcVec);
	}
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
	};

	// On pre-AVX2 targets, split into 128-bit nodes of
	// ISD::*_EXTEND_VECTOR_INREG.
	if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128))
	return SplitAndExtendInReg(128);

	// On pre-AVX512 targets, split into 256-bit nodes of
	// ISD::*_EXTEND_VECTOR_INREG.
	if (!Subtarget.hasAVX512() && !(VT.getSizeInBits() % 256))
	return SplitAndExtendInReg(256);

	return SDValue();
	}

	static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	EVT InVT = N0.getValueType();
	SDLoc DL(N);

	if (SDValue DivRem8 = getDivRem8(N, DAG))
	return DivRem8;

	if (!DCI.isBeforeLegalizeOps()) {
	if (InVT == MVT::i1) {
	SDValue Zero = DAG.getConstant(0, DL, VT);
	SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
	return DAG.getSelect(DL, VT, N0, AllOnes, Zero);
	}
	return SDValue();
	}

	if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR &&
	isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) {
	// Invert and sign-extend a boolean is the same as zero-extend and subtract
	// 1 because 0 becomes -1 and 1 becomes 0. The subtract is efficiently
	// lowered with an LEA or a DEC. This is the same as: select Bool, 0, -1.
	// sext (xor Bool, -1) --> sub (zext Bool), 1
	SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
	return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT));
	}

	if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
	return V;

	if (Subtarget.hasAVX() && VT.is256BitVector())
	if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
	return R;

	if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
	return NewAdd;

	return SDValue();
	}

	static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);

	// Let legalize expand this if it isn't a legal type yet.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	EVT ScalarVT = VT.getScalarType();
	if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) \|\| !Subtarget.hasAnyFMA())
	return SDValue();

	SDValue A = N->getOperand(0);
	SDValue B = N->getOperand(1);
	SDValue C = N->getOperand(2);

	auto invertIfNegative = [](SDValue &V) {
	if (SDValue NegVal = isFNEG(V.getNode())) {
	V = NegVal;
	return true;
	}
	return false;
	};

	// Do not convert the passthru input of scalar intrinsics.
	// FIXME: We could allow negations of the lower element only.
	bool NegA = N->getOpcode() != X86ISD::FMADDS1_RND && invertIfNegative(A);
	bool NegB = invertIfNegative(B);
	bool NegC = N->getOpcode() != X86ISD::FMADDS3_RND && invertIfNegative(C);

	// Negative multiplication when NegA xor NegB
	bool NegMul = (NegA != NegB);

	unsigned NewOpcode;
	if (!NegMul)
	NewOpcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
	else
	NewOpcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;


	if (N->getOpcode() == X86ISD::FMADD_RND) {
	switch (NewOpcode) {
	case X86ISD::FMADD: NewOpcode = X86ISD::FMADD_RND; break;
	case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUB_RND; break;
	case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD_RND; break;
	case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB_RND; break;
	}
	} else if (N->getOpcode() == X86ISD::FMADDS1_RND) {
	switch (NewOpcode) {
	case X86ISD::FMADD: NewOpcode = X86ISD::FMADDS1_RND; break;
	case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS1_RND; break;
	case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS1_RND; break;
	case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS1_RND; break;
	}
	} else if (N->getOpcode() == X86ISD::FMADDS3_RND) {
	switch (NewOpcode) {
	case X86ISD::FMADD: NewOpcode = X86ISD::FMADDS3_RND; break;
	case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS3_RND; break;
	case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3_RND; break;
	case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3_RND; break;
	}
	} else {
	assert((N->getOpcode() == X86ISD::FMADD \|\| N->getOpcode() == ISD::FMA) &&
	"Unexpected opcode!");
	return DAG.getNode(NewOpcode, dl, VT, A, B, C);
	}

	return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3));
	}

	static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	// (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
	// (and (i32 x86isd::setcc_carry), 1)
	// This eliminates the zext. This transformation is necessary because
	// ISD::SETCC is always legalized to i8.
	SDLoc dl(N);
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	if (N0.getOpcode() == ISD::AND &&
	N0.hasOneUse() &&
	N0.getOperand(0).hasOneUse()) {
	SDValue N00 = N0.getOperand(0);
	if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
	if (!isOneConstant(N0.getOperand(1)))
	return SDValue();
	return DAG.getNode(ISD::AND, dl, VT,
	DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
	N00.getOperand(0), N00.getOperand(1)),
	DAG.getConstant(1, dl, VT));
	}
	}

	if (N0.getOpcode() == ISD::TRUNCATE &&
	N0.hasOneUse() &&
	N0.getOperand(0).hasOneUse()) {
	SDValue N00 = N0.getOperand(0);
	if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
	return DAG.getNode(ISD::AND, dl, VT,
	DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
	N00.getOperand(0), N00.getOperand(1)),
	DAG.getConstant(1, dl, VT));
	}
	}

	if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
	return V;

	if (VT.is256BitVector())
	if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
	return R;

	if (SDValue DivRem8 = getDivRem8(N, DAG))
	return DivRem8;

	if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
	return NewAdd;

	if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget))
	return R;

	return SDValue();
	}

	/// Try to map a 128-bit or larger integer comparison to vector instructions
	/// before type legalization splits it up into chunks.
	static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
	assert((CC == ISD::SETNE \|\| CC == ISD::SETEQ) && "Bad comparison predicate");

	// We're looking for an oversized integer equality comparison, but ignore a
	// comparison with zero because that gets special treatment in EmitTest().
	SDValue X = SetCC->getOperand(0);
	SDValue Y = SetCC->getOperand(1);
	EVT OpVT = X.getValueType();
	unsigned OpSize = OpVT.getSizeInBits();
	if (!OpVT.isScalarInteger() \|\| OpSize < 128 \|\| isNullConstant(Y))
	return SDValue();

	// TODO: Use PXOR + PTEST for SSE4.1 or later?
	// TODO: Add support for AVX-512.
	EVT VT = SetCC->getValueType(0);
	SDLoc DL(SetCC);
	if ((OpSize == 128 && Subtarget.hasSSE2()) \|\|
	(OpSize == 256 && Subtarget.hasAVX2())) {
	EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8;
	SDValue VecX = DAG.getBitcast(VecVT, X);
	SDValue VecY = DAG.getBitcast(VecVT, Y);

	// If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
	// setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
	// setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
	// setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq
	// setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne
	SDValue Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY);
	SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
	SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL,
	MVT::i32);
	return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC);
	}

	return SDValue();
	}

	static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	if (CC == ISD::SETNE \|\| CC == ISD::SETEQ) {
	EVT OpVT = LHS.getValueType();
	// 0-x == y --> x+y == 0
	// 0-x != y --> x+y != 0
	if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
	LHS.hasOneUse()) {
	SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, RHS, LHS.getOperand(1));
	return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
	}
	// x == 0-y --> x+y == 0
	// x != 0-y --> x+y != 0
	if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
	RHS.hasOneUse()) {
	SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
	return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
	}

	if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget))
	return V;
	}

	if (VT.getScalarType() == MVT::i1 &&
	(CC == ISD::SETNE \|\| CC == ISD::SETEQ \|\| ISD::isSignedIntSetCC(CC))) {
	bool IsSEXT0 =
	(LHS.getOpcode() == ISD::SIGN_EXTEND) &&
	(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
	bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());

	if (!IsSEXT0 \|\| !IsVZero1) {
	// Swap the operands and update the condition code.
	std::swap(LHS, RHS);
	CC = ISD::getSetCCSwappedOperands(CC);

	IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
	(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
	IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
	}

	if (IsSEXT0 && IsVZero1) {
	assert(VT == LHS.getOperand(0).getValueType() &&
	"Uexpected operand type");
	if (CC == ISD::SETGT)
	return DAG.getConstant(0, DL, VT);
	if (CC == ISD::SETLE)
	return DAG.getConstant(1, DL, VT);
	if (CC == ISD::SETEQ \|\| CC == ISD::SETGE)
	return DAG.getNOT(DL, LHS.getOperand(0), VT);

	assert((CC == ISD::SETNE \|\| CC == ISD::SETLT) &&
	"Unexpected condition code!");
	return LHS.getOperand(0);
	}
	}

	// For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early
	// to avoid scalarization via legalization because v4i32 is not a legal type.
	if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&
	LHS.getValueType() == MVT::v4f32)
	return LowerVSETCC(SDValue(N, 0), Subtarget, DAG);

	return SDValue();
	}

	static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);
	// Gather and Scatter instructions use k-registers for masks. The type of
	// the masks is v*i1. So the mask will be truncated anyway.
	// The SIGN_EXTEND_INREG my be dropped.
	SDValue Mask = N->getOperand(2);
	if (Mask.getOpcode() == ISD::SIGN_EXTEND_INREG) {
	SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
	NewOps[2] = Mask.getOperand(0);
	DAG.UpdateNodeOperands(N, NewOps);
	}
	return SDValue();
	}

	// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
	static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc DL(N);
	X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
	SDValue EFLAGS = N->getOperand(1);

	// Try to simplify the EFLAGS and condition code operands.
	if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG))
	return getSETCC(CC, Flags, DL, DAG);

	return SDValue();
	}

	/// Optimize branch condition evaluation.
	static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc DL(N);
	SDValue EFLAGS = N->getOperand(3);
	X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));

	// Try to simplify the EFLAGS and condition code operands.
	// Make sure to not keep references to operands, as combineSetCCEFLAGS can
	// RAUW them under us.
	if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
	SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
	return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
	N->getOperand(1), Cond, Flags);
	}

	return SDValue();
	}

	static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
	SelectionDAG &DAG) {
	// Take advantage of vector comparisons producing 0 or -1 in each lane to
	// optimize away operation when it's from a constant.
	//
	// The general transformation is:
	// UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
	// AND(VECTOR_CMP(x,y), constant2)
	// constant2 = UNARYOP(constant)

	// Early exit if this isn't a vector operation, the operand of the
	// unary operation isn't a bitwise AND, or if the sizes of the operations
	// aren't the same.
	EVT VT = N->getValueType(0);
	if (!VT.isVector() \|\| N->getOperand(0)->getOpcode() != ISD::AND \|\|
	N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC \|\|
	VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
	return SDValue();

	// Now check that the other operand of the AND is a constant. We could
	// make the transformation for non-constant splats as well, but it's unclear
	// that would be a benefit as it would not eliminate any operations, just
	// perform one more step in scalar code before moving to the vector unit.
	if (BuildVectorSDNode *BV =
	dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
	// Bail out if the vector isn't a constant.
	if (!BV->isConstant())
	return SDValue();

	// Everything checks out. Build up the new and improved node.
	SDLoc DL(N);
	EVT IntVT = BV->getValueType(0);
	// Create a new constant of the appropriate type for the transformed
	// DAG.
	SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
	// The AND node needs bitcasts to/from an integer vector type around it.
	SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst);
	SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
	N->getOperand(0)->getOperand(0), MaskConst);
	SDValue Res = DAG.getBitcast(VT, NewAnd);
	return Res;
	}

	return SDValue();
	}

	static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue Op0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	EVT InVT = Op0.getValueType();
	EVT InSVT = InVT.getScalarType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
	// UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
	if (InVT.isVector() && (InSVT == MVT::i8 \|\| InSVT == MVT::i16)) {
	SDLoc dl(N);
	EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
	InVT.getVectorNumElements());
	SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);

	if (TLI.isOperationLegal(ISD::UINT_TO_FP, DstVT))
	return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P);

	return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
	}

	// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
	// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
	// the optimization here.
	if (DAG.SignBitIsZero(Op0))
	return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);

	return SDValue();
	}

	static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	// First try to optimize away the conversion entirely when it's
	// conditionally from a constant. Vectors only.
	if (SDValue Res = combineVectorCompareAndMaskUnaryOp(N, DAG))
	return Res;

	// Now move on to more general possibilities.
	SDValue Op0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	EVT InVT = Op0.getValueType();
	EVT InSVT = InVT.getScalarType();

	// SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
	// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
	// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
	if (InVT.isVector() &&
	(InSVT == MVT::i8 \|\| InSVT == MVT::i16 \|\|
	(InSVT == MVT::i1 && !DAG.getTargetLoweringInfo().isTypeLegal(InVT)))) {
	SDLoc dl(N);
	EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
	InVT.getVectorNumElements());
	SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
	return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
	}

	// Without AVX512DQ we only support i64 to float scalar conversion. For both
	// vectors and scalars, see if we know that the upper bits are all the sign
	// bit, in which case we can truncate the input to i32 and convert from that.
	if (InVT.getScalarSizeInBits() > 32 && !Subtarget.hasDQI()) {
	unsigned BitWidth = InVT.getScalarSizeInBits();
	unsigned NumSignBits = DAG.ComputeNumSignBits(Op0);
	if (NumSignBits >= (BitWidth - 31)) {
	EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32);
	if (InVT.isVector())
	TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT,
	InVT.getVectorNumElements());
	SDLoc dl(N);
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
	return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
	}
	}

	// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
	// a 32-bit target where SSE doesn't support i64->FP operations.
	if (!Subtarget.useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
	LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
	EVT LdVT = Ld->getValueType(0);

	// This transformation is not supported if the result type is f16 or f128.
	if (VT == MVT::f16 \|\| VT == MVT::f128)
	return SDValue();

	if (!Ld->isVolatile() && !VT.isVector() &&
	ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
	!Subtarget.is64Bit() && LdVT == MVT::i64) {
	SDValue FILDChain = Subtarget.getTargetLowering()->BuildFILD(
	SDValue(N, 0), LdVT, Ld->getChain(), Op0, DAG);
	DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
	return FILDChain;
	}
	}
	return SDValue();
	}

	// Optimize RES, EFLAGS = X86ISD::ADD LHS, RHS
	static SDValue combineX86ADD(SDNode *N, SelectionDAG &DAG,
	X86TargetLowering::DAGCombinerInfo &DCI) {
	// When legalizing carry, we create carries via add X, -1
	// If that comes from an actual carry, via setcc, we use the
	// carry directly.
	if (isAllOnesConstant(N->getOperand(1)) && N->hasAnyUseOfValue(1)) {
	SDValue Carry = N->getOperand(0);
	while (Carry.getOpcode() == ISD::TRUNCATE \|\|
	Carry.getOpcode() == ISD::ZERO_EXTEND \|\|
	Carry.getOpcode() == ISD::SIGN_EXTEND \|\|
	Carry.getOpcode() == ISD::ANY_EXTEND \|\|
	(Carry.getOpcode() == ISD::AND &&
	isOneConstant(Carry.getOperand(1))))
	Carry = Carry.getOperand(0);

	if (Carry.getOpcode() == X86ISD::SETCC \|\|
	Carry.getOpcode() == X86ISD::SETCC_CARRY) {
	if (Carry.getConstantOperandVal(0) == X86::COND_B)
	return DCI.CombineTo(N, SDValue(N, 0), Carry.getOperand(1));
	}
	}

	return SDValue();
	}

	// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
	static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
	X86TargetLowering::DAGCombinerInfo &DCI) {
	// If the LHS and RHS of the ADC node are zero, then it can't overflow and
	// the result is either zero or one (depending on the input carry bit).
	// Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
	if (X86::isZeroNode(N->getOperand(0)) &&
	X86::isZeroNode(N->getOperand(1)) &&
	// We don't have a good way to replace an EFLAGS use, so only do this when
	// dead right now.
	SDValue(N, 1).use_empty()) {
	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1));
	SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
	DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
	DAG.getConstant(X86::COND_B, DL,
	MVT::i8),
	N->getOperand(2)),
	DAG.getConstant(1, DL, VT));
	return DCI.CombineTo(N, Res1, CarryOut);
	}

	return SDValue();
	}

	/// Materialize "setb reg" as "sbb reg,reg", since it produces an all-ones bit
	/// which is more useful than 0/1 in some cases.
	static SDValue materializeSBB(SDNode *N, SDValue EFLAGS, SelectionDAG &DAG) {
	SDLoc DL(N);
	// "Condition code B" is also known as "the carry flag" (CF).
	SDValue CF = DAG.getConstant(X86::COND_B, DL, MVT::i8);
	SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, CF, EFLAGS);
	MVT VT = N->getSimpleValueType(0);
	if (VT == MVT::i8)
	return DAG.getNode(ISD::AND, DL, VT, SBB, DAG.getConstant(1, DL, VT));

	assert(VT == MVT::i1 && "Unexpected type for SETCC node");
	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SBB);
	}

	/// If this is an add or subtract where one operand is produced by a cmp+setcc,
	/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
	/// with CMP+{ADC, SBB}.
	static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
	bool IsSub = N->getOpcode() == ISD::SUB;
	SDValue X = N->getOperand(0);
	SDValue Y = N->getOperand(1);

	// If this is an add, canonicalize a zext operand to the RHS.
	// TODO: Incomplete? What if both sides are zexts?
	if (!IsSub && X.getOpcode() == ISD::ZERO_EXTEND &&
	Y.getOpcode() != ISD::ZERO_EXTEND)
	std::swap(X, Y);

	// Look through a one-use zext.
	bool PeekedThroughZext = false;
	if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) {
	Y = Y.getOperand(0);
	PeekedThroughZext = true;
	}

	// If this is an add, canonicalize a setcc operand to the RHS.
	// TODO: Incomplete? What if both sides are setcc?
	// TODO: Should we allow peeking through a zext of the other operand?
	if (!IsSub && !PeekedThroughZext && X.getOpcode() == X86ISD::SETCC &&
	Y.getOpcode() != X86ISD::SETCC)
	std::swap(X, Y);

	if (Y.getOpcode() != X86ISD::SETCC \|\| !Y.hasOneUse())
	return SDValue();

	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0);

	// If X is -1 or 0, then we have an opportunity to avoid constants required in
	// the general case below.
	auto *ConstantX = dyn_cast<ConstantSDNode>(X);
	if (ConstantX) {
	if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnesValue()) \|\|
	(IsSub && CC == X86::COND_B && ConstantX->isNullValue())) {
	// This is a complicated way to get -1 or 0 from the carry flag:
	// -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax
	// 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax
	return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
	DAG.getConstant(X86::COND_B, DL, MVT::i8),
	Y.getOperand(1));
	}

	if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnesValue()) \|\|
	(IsSub && CC == X86::COND_A && ConstantX->isNullValue())) {
	SDValue EFLAGS = Y->getOperand(1);
	if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
	EFLAGS.getValueType().isInteger() &&
	!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
	// Swap the operands of a SUB, and we have the same pattern as above.
	// -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB
	// 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB
	SDValue NewSub = DAG.getNode(
	X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
	EFLAGS.getOperand(1), EFLAGS.getOperand(0));
	SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
	return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
	DAG.getConstant(X86::COND_B, DL, MVT::i8),
	NewEFLAGS);
	}
	}
	}

	if (CC == X86::COND_B) {
	// X + SETB Z --> X + (mask SBB Z, Z)
	// X - SETB Z --> X - (mask SBB Z, Z)
	// TODO: Produce ADC/SBB here directly and avoid SETCC_CARRY?
	SDValue SBB = materializeSBB(Y.getNode(), Y.getOperand(1), DAG);
	if (SBB.getValueSizeInBits() != VT.getSizeInBits())
	SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
	return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
	}

	if (CC == X86::COND_A) {
	SDValue EFLAGS = Y->getOperand(1);
	// Try to convert COND_A into COND_B in an attempt to facilitate
	// materializing "setb reg".
	//
	// Do not flip "e > c", where "c" is a constant, because Cmp instruction
	// cannot take an immediate as its first operand.
	//
	if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
	EFLAGS.getValueType().isInteger() &&
	!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
	SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
	EFLAGS.getNode()->getVTList(),
	EFLAGS.getOperand(1), EFLAGS.getOperand(0));
	SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
	SDValue SBB = materializeSBB(Y.getNode(), NewEFLAGS, DAG);
	if (SBB.getValueSizeInBits() != VT.getSizeInBits())
	SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
	return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
	}
	}

	if (CC != X86::COND_E && CC != X86::COND_NE)
	return SDValue();

	SDValue Cmp = Y.getOperand(1);
	if (Cmp.getOpcode() != X86ISD::CMP \|\| !Cmp.hasOneUse() \|\|
	!X86::isZeroNode(Cmp.getOperand(1)) \|\|
	!Cmp.getOperand(0).getValueType().isInteger())
	return SDValue();

	SDValue Z = Cmp.getOperand(0);
	EVT ZVT = Z.getValueType();

	// If X is -1 or 0, then we have an opportunity to avoid constants required in
	// the general case below.
	if (ConstantX) {
	// 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with
	// fake operands:
	// 0 - (Z != 0) --> sbb %eax, %eax, (neg Z)
	// -1 + (Z == 0) --> sbb %eax, %eax, (neg Z)
	if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) \|\|
	(!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) {
	SDValue Zero = DAG.getConstant(0, DL, ZVT);
	SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
	SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
	return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
	DAG.getConstant(X86::COND_B, DL, MVT::i8),
	SDValue(Neg.getNode(), 1));
	}

	// cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb'
	// with fake operands:
	// 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1)
	// -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1)
	if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) \|\|
	(!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) {
	SDValue One = DAG.getConstant(1, DL, ZVT);
	SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One);
	return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
	DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp1);
	}
	}

	// (cmp Z, 1) sets the carry flag if Z is 0.
	SDValue One = DAG.getConstant(1, DL, ZVT);
	SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One);

	// Add the flags type for ADC/SBB nodes.
	SDVTList VTs = DAG.getVTList(VT, MVT::i32);

	// X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1)
	// X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)
	if (CC == X86::COND_NE)
	return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X,
	DAG.getConstant(-1ULL, DL, VT), Cmp1);

	// X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1)
	// X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1)
	return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X,
	DAG.getConstant(0, DL, VT), Cmp1);
	}

	static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue MulOp = N->getOperand(0);
	SDValue Phi = N->getOperand(1);

	if (MulOp.getOpcode() != ISD::MUL)
	std::swap(MulOp, Phi);
	if (MulOp.getOpcode() != ISD::MUL)
	return SDValue();

	ShrinkMode Mode;
	if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) \|\| Mode == MULU16)
	return SDValue();

	EVT VT = N->getValueType(0);

	unsigned RegSize = 128;
	if (Subtarget.hasBWI())
	RegSize = 512;
	else if (Subtarget.hasAVX2())
	RegSize = 256;
	unsigned VectorSize = VT.getVectorNumElements() * 16;
	// If the vector size is less than 128, or greater than the supported RegSize,
	// do not use PMADD.
	if (VectorSize < 128 \|\| VectorSize > RegSize)
	return SDValue();

	SDLoc DL(N);
	EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
	VT.getVectorNumElements());
	EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
	VT.getVectorNumElements() / 2);

	// Shrink the operands of mul.
	SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(0));
	SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(1));

	// Madd vector size is half of the original vector size
	SDValue Madd = DAG.getNode(X86ISD::VPMADDWD, DL, MAddVT, N0, N1);
	// Fill the rest of the output with 0
	SDValue Zero = getZeroVector(Madd.getSimpleValueType(), Subtarget, DAG, DL);
	SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, Zero);
	return DAG.getNode(ISD::ADD, DL, VT, Concat, Phi);
	}

	static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);

	// TODO: There's nothing special about i32, any integer type above i16 should
	// work just as well.
	if (!VT.isVector() \|\| !VT.isSimple() \|\|
	!(VT.getVectorElementType() == MVT::i32))
	return SDValue();

	unsigned RegSize = 128;
	if (Subtarget.hasBWI())
	RegSize = 512;
	else if (Subtarget.hasAVX2())
	RegSize = 256;

	// We only handle v16i32 for SSE2 / v32i32 for AVX2 / v64i32 for AVX512.
	// TODO: We should be able to handle larger vectors by splitting them before
	// feeding them into several SADs, and then reducing over those.
	if (VT.getSizeInBits() / 4 > RegSize)
	return SDValue();

	// We know N is a reduction add, which means one of its operands is a phi.
	// To match SAD, we need the other operand to be a vector select.
	SDValue SelectOp, Phi;
	if (Op0.getOpcode() == ISD::VSELECT) {
	SelectOp = Op0;
	Phi = Op1;
	} else if (Op1.getOpcode() == ISD::VSELECT) {
	SelectOp = Op1;
	Phi = Op0;
	} else
	return SDValue();

	// Check whether we have an abs-diff pattern feeding into the select.
	if(!detectZextAbsDiff(SelectOp, Op0, Op1))
	return SDValue();

	// SAD pattern detected. Now build a SAD instruction and an addition for
	// reduction. Note that the number of elements of the result of SAD is less
	// than the number of elements of its input. Therefore, we could only update
	// part of elements in the reduction vector.
	SDValue Sad = createPSADBW(DAG, Op0, Op1, DL);

	// The output of PSADBW is a vector of i64.
	// We need to turn the vector of i64 into a vector of i32.
	// If the reduction vector is at least as wide as the psadbw result, just
	// bitcast. If it's narrower, truncate - the high i32 of each i64 is zero
	// anyway.
	MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
	if (VT.getSizeInBits() >= ResVT.getSizeInBits())
	Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
	else
	Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad);

	if (VT.getSizeInBits() > ResVT.getSizeInBits()) {
	// Update part of elements of the reduction vector. This is done by first
	// extracting a sub-vector from it, updating this sub-vector, and inserting
	// it back.
	SDValue SubPhi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Phi,
	DAG.getIntPtrConstant(0, DL));
	SDValue Res = DAG.getNode(ISD::ADD, DL, ResVT, Sad, SubPhi);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Phi, Res,
	DAG.getIntPtrConstant(0, DL));
	} else
	return DAG.getNode(ISD::ADD, DL, VT, Sad, Phi);
	}

	/// Convert vector increment or decrement to sub/add with an all-ones constant:
	/// add X, <1, 1...> --> sub X, <-1, -1...>
	/// sub X, <1, 1...> --> add X, <-1, -1...>
	/// The all-ones vector constant can be materialized using a pcmpeq instruction
	/// that is commonly recognized as an idiom (has no register dependency), so
	/// that's better/smaller than loading a splat 1 constant.
	static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) {
	assert((N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD::SUB) &&
	"Unexpected opcode for increment/decrement transform");

	// Pseudo-legality check: getOnesVector() expects one of these types, so bail
	// out and wait for legalization if we have an unsupported vector length.
	EVT VT = N->getValueType(0);
	if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
	return SDValue();

	SDNode *N1 = N->getOperand(1).getNode();
	APInt SplatVal;
	- if (!ISD::isConstantSplatVector(N1, SplatVal) \|\| !SplatVal.isOneValue())
	+ if (!ISD::isConstantSplatVector(N1, SplatVal, /AllowShrink/false) \|\|
	+ !SplatVal.isOneValue())
	return SDValue();

	SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N));
	unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
	return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec);
	}

	static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	const SDNodeFlags Flags = N->getFlags();
	if (Flags.hasVectorReduction()) {
	if (SDValue Sad = combineLoopSADPattern(N, DAG, Subtarget))
	return Sad;
	if (SDValue MAdd = combineLoopMAddPattern(N, DAG, Subtarget))
	return MAdd;
	}
	EVT VT = N->getValueType(0);
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);

	// Try to synthesize horizontal adds from adds of shuffles.
	if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 \|\| VT == MVT::v4i32)) \|\|
	(Subtarget.hasInt256() && (VT == MVT::v16i16 \|\| VT == MVT::v8i32))) &&
	isHorizontalBinOp(Op0, Op1, true))
	return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1);

	if (SDValue V = combineIncDecVector(N, DAG))
	return V;

	return combineAddOrSubToADCOrSBB(N, DAG);
	}

	static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);

	// X86 can't encode an immediate LHS of a sub. See if we can push the
	// negation into a preceding instruction.
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
	// If the RHS of the sub is a XOR with one use and a constant, invert the
	// immediate. Then add one to the LHS of the sub so we can turn
	// X-Y -> X+~Y+1, saving one register.
	if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
	isa<ConstantSDNode>(Op1.getOperand(1))) {
	APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue();
	EVT VT = Op0.getValueType();
	SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT,
	Op1.getOperand(0),
	DAG.getConstant(~XorC, SDLoc(Op1), VT));
	return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewXor,
	DAG.getConstant(C->getAPIntValue() + 1, SDLoc(N), VT));
	}
	}

	// Try to synthesize horizontal subs from subs of shuffles.
	EVT VT = N->getValueType(0);
	if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 \|\| VT == MVT::v4i32)) \|\|
	(Subtarget.hasInt256() && (VT == MVT::v16i16 \|\| VT == MVT::v8i32))) &&
	isHorizontalBinOp(Op0, Op1, false))
	return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);

	if (SDValue V = combineIncDecVector(N, DAG))
	return V;

	return combineAddOrSubToADCOrSBB(N, DAG);
	}

	static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (DCI.isBeforeLegalize())
	return SDValue();

	SDLoc DL(N);
	unsigned Opcode = N->getOpcode();
	MVT VT = N->getSimpleValueType(0);
	MVT SVT = VT.getVectorElementType();
	unsigned NumElts = VT.getVectorNumElements();
	unsigned EltSizeInBits = SVT.getSizeInBits();

	SDValue Op = N->getOperand(0);
	MVT OpVT = Op.getSimpleValueType();
	MVT OpEltVT = OpVT.getVectorElementType();
	unsigned OpEltSizeInBits = OpEltVT.getSizeInBits();
	unsigned InputBits = OpEltSizeInBits * NumElts;

	// Perform any constant folding.
	// FIXME: Reduce constant pool usage and don't fold when OptSize is enabled.
	APInt UndefElts;
	SmallVector<APInt, 64> EltBits;
	if (getTargetConstantBitsFromNode(Op, OpEltSizeInBits, UndefElts, EltBits)) {
	APInt Undefs(NumElts, 0);
	SmallVector<APInt, 4> Vals(NumElts, APInt(EltSizeInBits, 0));
	bool IsZEXT =
	(Opcode == X86ISD::VZEXT) \|\| (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG);
	for (unsigned i = 0; i != NumElts; ++i) {
	if (UndefElts[i]) {
	Undefs.setBit(i);
	continue;
	}
	Vals[i] = IsZEXT ? EltBits[i].zextOrTrunc(EltSizeInBits)
	: EltBits[i].sextOrTrunc(EltSizeInBits);
	}
	return getConstVector(Vals, Undefs, VT, DAG, DL);
	}

	// (vzext (bitcast (vzext (x)) -> (vzext x)
	// TODO: (vsext (bitcast (vsext (x)) -> (vsext x)
	SDValue V = peekThroughBitcasts(Op);
	if (Opcode == X86ISD::VZEXT && V != Op && V.getOpcode() == X86ISD::VZEXT) {
	MVT InnerVT = V.getSimpleValueType();
	MVT InnerEltVT = InnerVT.getVectorElementType();

	// If the element sizes match exactly, we can just do one larger vzext. This
	// is always an exact type match as vzext operates on integer types.
	if (OpEltVT == InnerEltVT) {
	assert(OpVT == InnerVT && "Types must match for vzext!");
	return DAG.getNode(X86ISD::VZEXT, DL, VT, V.getOperand(0));
	}

	// The only other way we can combine them is if only a single element of the
	// inner vzext is used in the input to the outer vzext.
	if (InnerEltVT.getSizeInBits() < InputBits)
	return SDValue();

	// In this case, the inner vzext is completely dead because we're going to
	// only look at bits inside of the low element. Just do the outer vzext on
	// a bitcast of the input to the inner.
	return DAG.getNode(X86ISD::VZEXT, DL, VT, DAG.getBitcast(OpVT, V));
	}

	// Check if we can bypass extracting and re-inserting an element of an input
	// vector. Essentially:
	// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
	// TODO: Add X86ISD::VSEXT support
	if (Opcode == X86ISD::VZEXT &&
	V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
	SDValue ExtractedV = V.getOperand(0);
	SDValue OrigV = ExtractedV.getOperand(0);
	if (isNullConstant(ExtractedV.getOperand(1))) {
	MVT OrigVT = OrigV.getSimpleValueType();
	// Extract a subvector if necessary...
	if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) {
	int Ratio = OrigVT.getSizeInBits() / OpVT.getSizeInBits();
	OrigVT = MVT::getVectorVT(OrigVT.getVectorElementType(),
	OrigVT.getVectorNumElements() / Ratio);
	OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV,
	DAG.getIntPtrConstant(0, DL));
	}
	Op = DAG.getBitcast(OpVT, OrigV);
	return DAG.getNode(X86ISD::VZEXT, DL, VT, Op);
	}
	}

	return SDValue();
	}

	/// Canonicalize (LSUB p, 1) -> (LADD p, -1).
	static SDValue combineLockSub(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	SDValue Chain = N->getOperand(0);
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);
	MVT VT = RHS.getSimpleValueType();
	SDLoc DL(N);

	auto *C = dyn_cast<ConstantSDNode>(RHS);
	if (!C \|\| C->getZExtValue() != 1)
	return SDValue();

	RHS = DAG.getConstant(-1, DL, VT);
	MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
	return DAG.getMemIntrinsicNode(X86ISD::LADD, DL,
	DAG.getVTList(MVT::i32, MVT::Other),
	{Chain, LHS, RHS}, VT, MMO);
	}

	// TEST (AND a, b) ,(AND a, b) -> TEST a, b
	static SDValue combineTestM(SDNode *N, SelectionDAG &DAG) {
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);

	if (Op0 != Op1 \|\| Op1->getOpcode() != ISD::AND)
	return SDValue();

	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	return DAG.getNode(X86ISD::TESTM, DL, VT,
	Op0->getOperand(0), Op0->getOperand(1));
	}

	static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {
	MVT VT = N->getSimpleValueType(0);
	SDLoc DL(N);

	if (N->getOperand(0) == N->getOperand(1)) {
	if (N->getOpcode() == X86ISD::PCMPEQ)
	return getOnesVector(VT, DAG, DL);
	if (N->getOpcode() == X86ISD::PCMPGT)
	return getZeroVector(VT, Subtarget, DAG, DL);
	}

	return SDValue();
	}

	static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const X86Subtarget &Subtarget) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	SDLoc dl(N);
	SDValue Vec = N->getOperand(0);
	SDValue SubVec = N->getOperand(1);
	SDValue Idx = N->getOperand(2);

	unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
	MVT OpVT = N->getSimpleValueType(0);
	MVT SubVecVT = SubVec.getSimpleValueType();

	// If this is an insert of an extract, combine to a shuffle. Don't do this
	// if the insert or extract can be represented with a subvector operation.
	if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	SubVec.getOperand(0).getSimpleValueType() == OpVT &&
	(IdxVal != 0 \|\| !Vec.isUndef())) {
	int ExtIdxVal = cast<ConstantSDNode>(SubVec.getOperand(1))->getZExtValue();
	if (ExtIdxVal != 0) {
	int VecNumElts = OpVT.getVectorNumElements();
	int SubVecNumElts = SubVecVT.getVectorNumElements();
	SmallVector<int, 64> Mask(VecNumElts);
	// First create an identity shuffle mask.
	for (int i = 0; i != VecNumElts; ++i)
	Mask[i] = i;
	// Now insert the extracted portion.
	for (int i = 0; i != SubVecNumElts; ++i)
	Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;

	return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask);
	}
	}

	// Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
	// load:
	// (insert_subvector (insert_subvector undef, (load16 addr), 0),
	// (load16 addr + 16), Elts/2)
	// --> load32 addr
	// or:
	// (insert_subvector (insert_subvector undef, (load32 addr), 0),
	// (load32 addr + 32), Elts/2)
	// --> load64 addr
	// or a 16-byte or 32-byte broadcast:
	// (insert_subvector (insert_subvector undef, (load16 addr), 0),
	// (load16 addr), Elts/2)
	// --> X86SubVBroadcast(load16 addr)
	// or:
	// (insert_subvector (insert_subvector undef, (load32 addr), 0),
	// (load32 addr), Elts/2)
	// --> X86SubVBroadcast(load32 addr)
	if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
	Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
	OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
	auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
	if (Idx2 && Idx2->getZExtValue() == 0) {
	SDValue SubVec2 = Vec.getOperand(1);
	// If needed, look through bitcasts to get to the load.
	if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
	bool Fast;
	unsigned Alignment = FirstLd->getAlignment();
	unsigned AS = FirstLd->getAddressSpace();
	const X86TargetLowering *TLI = Subtarget.getTargetLowering();
	if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
	OpVT, AS, Alignment, &Fast) && Fast) {
	SDValue Ops[] = {SubVec2, SubVec};
	if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG,
	Subtarget, false))
	return Ld;
	}
	}
	// If lower/upper loads are the same and the only users of the load, then
	// lower to a VBROADCASTF128/VBROADCASTI128/etc.
	if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
	if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
	SDNode::areOnlyUsersOf({N, Vec.getNode()}, SubVec2.getNode())) {
	return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
	}
	}
	// If this is subv_broadcast insert into both halves, use a larger
	// subv_broadcast.
	if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) {
	return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
	SubVec.getOperand(0));
	}
	}
	}

	return SDValue();
	}


	SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	switch (N->getOpcode()) {
	default: break;
	case ISD::EXTRACT_VECTOR_ELT:
	return combineExtractVectorElt(N, DAG, DCI, Subtarget);
	case X86ISD::PEXTRW:
	case X86ISD::PEXTRB:
	return combineExtractVectorElt_SSE(N, DAG, DCI, Subtarget);
	case ISD::INSERT_SUBVECTOR:
	return combineInsertSubvector(N, DAG, DCI, Subtarget);
	case ISD::VSELECT:
	case ISD::SELECT:
	case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);
	case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
	case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
	case ISD::ADD: return combineAdd(N, DAG, Subtarget);
	case ISD::SUB: return combineSub(N, DAG, Subtarget);
	case X86ISD::ADD: return combineX86ADD(N, DAG, DCI);
	case X86ISD::ADC: return combineADC(N, DAG, DCI);
	case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);
	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL: return combineShift(N, DAG, DCI, Subtarget);
	case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget);
	case ISD::OR: return combineOr(N, DAG, DCI, Subtarget);
	case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget);
	case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
	case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
	case ISD::STORE: return combineStore(N, DAG, Subtarget);
	case ISD::MSTORE: return combineMaskedStore(N, DAG, Subtarget);
	case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget);
	case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget);
	case ISD::FADD:
	case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
	case ISD::FNEG: return combineFneg(N, DAG, Subtarget);
	case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
	case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget);
	case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
	case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
	case X86ISD::FXOR:
	case X86ISD::FOR: return combineFOr(N, DAG, Subtarget);
	case X86ISD::FMIN:
	case X86ISD::FMAX: return combineFMinFMax(N, DAG);
	case ISD::FMINNUM:
	case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget);
	case X86ISD::BT: return combineBT(N, DAG, DCI);
	case ISD::ANY_EXTEND:
	case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget);
	case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget);
	case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget);
	case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
	case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
	case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
	case X86ISD::VSHLI:
	case X86ISD::VSRAI:
	case X86ISD::VSRLI:
	return combineVectorShiftImm(N, DAG, DCI, Subtarget);
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	case X86ISD::VSEXT:
	case X86ISD::VZEXT: return combineVSZext(N, DAG, DCI, Subtarget);
	case X86ISD::PINSRB:
	case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget);
	case X86ISD::SHUFP: // Handle all target specific shuffles
	case X86ISD::INSERTPS:
	case X86ISD::EXTRQI:
	case X86ISD::INSERTQI:
	case X86ISD::PALIGNR:
	case X86ISD::VSHLDQ:
	case X86ISD::VSRLDQ:
	case X86ISD::BLENDI:
	case X86ISD::UNPCKH:
	case X86ISD::UNPCKL:
	case X86ISD::MOVHLPS:
	case X86ISD::MOVLHPS:
	case X86ISD::PSHUFB:
	case X86ISD::PSHUFD:
	case X86ISD::PSHUFHW:
	case X86ISD::PSHUFLW:
	case X86ISD::MOVSHDUP:
	case X86ISD::MOVSLDUP:
	case X86ISD::MOVDDUP:
	case X86ISD::MOVSS:
	case X86ISD::MOVSD:
	case X86ISD::VPPERM:
	case X86ISD::VPERMI:
	case X86ISD::VPERMV:
	case X86ISD::VPERMV3:
	case X86ISD::VPERMIV3:
	case X86ISD::VPERMIL2:
	case X86ISD::VPERMILPI:
	case X86ISD::VPERMILPV:
	case X86ISD::VPERM2X128:
	case X86ISD::VZEXT_MOVL:
	case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
	case X86ISD::FMADD:
	case X86ISD::FMADD_RND:
	case X86ISD::FMADDS1_RND:
	case X86ISD::FMADDS3_RND:
	case ISD::FMA: return combineFMA(N, DAG, Subtarget);
	case ISD::MGATHER:
	case ISD::MSCATTER: return combineGatherScatter(N, DAG);
	case X86ISD::LSUB: return combineLockSub(N, DAG, Subtarget);
	case X86ISD::TESTM: return combineTestM(N, DAG);
	case X86ISD::PCMPEQ:
	case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
	}

	return SDValue();
	}

	/// Return true if the target has native support for the specified value type
	/// and it is 'desirable' to use the type for the given node type. e.g. On x86
	/// i16 is legal, but undesirable since i16 instruction encodings are longer and
	/// some i16 instructions are slow.
	bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
	if (!isTypeLegal(VT))
	return false;
	if (VT != MVT::i16)
	return true;

	switch (Opc) {
	default:
	return true;
	case ISD::LOAD:
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND:
	case ISD::SHL:
	case ISD::SRL:
	case ISD::SUB:
	case ISD::ADD:
	case ISD::MUL:
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR:
	return false;
	}
	}

	/// This function checks if any of the users of EFLAGS copies the EFLAGS. We
	/// know that the code that lowers COPY of EFLAGS has to use the stack, and if
	/// we don't adjust the stack we clobber the first frame index.
	/// See X86InstrInfo::copyPhysReg.
	static bool hasCopyImplyingStackAdjustment(const MachineFunction &MF) {
	const MachineRegisterInfo &MRI = MF.getRegInfo();
	return any_of(MRI.reg_instructions(X86::EFLAGS),
	[](const MachineInstr &RI) { return RI.isCopy(); });
	}

	void X86TargetLowering::finalizeLowering(MachineFunction &MF) const {
	if (hasCopyImplyingStackAdjustment(MF)) {
	MachineFrameInfo &MFI = MF.getFrameInfo();
	MFI.setHasCopyImplyingStackAdjustment(true);
	}

	TargetLoweringBase::finalizeLowering(MF);
	}

	/// This method query the target whether it is beneficial for dag combiner to
	/// promote the specified node. If true, it should return the desired promotion
	/// type by reference.
	bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
	EVT VT = Op.getValueType();
	if (VT != MVT::i16)
	return false;

	bool Promote = false;
	bool Commute = false;
	switch (Op.getOpcode()) {
	default: break;
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND:
	Promote = true;
	break;
	case ISD::SHL:
	case ISD::SRL: {
	SDValue N0 = Op.getOperand(0);
	// Look out for (store (shl (load), x)).
	if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
	return false;
	Promote = true;
	break;
	}
	case ISD::ADD:
	case ISD::MUL:
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR:
	Commute = true;
	LLVM_FALLTHROUGH;
	case ISD::SUB: {
	SDValue N0 = Op.getOperand(0);
	SDValue N1 = Op.getOperand(1);
	if (!Commute && MayFoldLoad(N1))
	return false;
	// Avoid disabling potential load folding opportunities.
	if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) \|\| MayFoldIntoStore(Op)))
	return false;
	if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) \|\| MayFoldIntoStore(Op)))
	return false;
	Promote = true;
	}
	}

	PVT = MVT::i32;
	return Promote;
	}

	//===----------------------------------------------------------------------===//
	// X86 Inline Assembly Support
	//===----------------------------------------------------------------------===//

	// Helper to match a string separated by whitespace.
	static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) {
	S = S.substr(S.find_first_not_of(" \t")); // Skip leading whitespace.

	for (StringRef Piece : Pieces) {
	if (!S.startswith(Piece)) // Check if the piece matches.
	return false;

	S = S.substr(Piece.size());
	StringRef::size_type Pos = S.find_first_not_of(" \t");
	if (Pos == 0) // We matched a prefix.
	return false;

	S = S.substr(Pos);
	}

	return S.empty();
	}

	static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {

	if (AsmPieces.size() == 3 \|\| AsmPieces.size() == 4) {
	if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") &&
	std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") &&
	std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) {

	if (AsmPieces.size() == 3)
	return true;
	else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}"))
	return true;
	}
	}
	return false;
	}

	bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
	InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());

	const std::string &AsmStr = IA->getAsmString();

	IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
	if (!Ty \|\| Ty->getBitWidth() % 16 != 0)
	return false;

	// TODO: should remove alternatives from the asmstring: "foo {a\|b}" -> "foo a"
	SmallVector<StringRef, 4> AsmPieces;
	SplitString(AsmStr, AsmPieces, ";\n");

	switch (AsmPieces.size()) {
	default: return false;
	case 1:
	// FIXME: this should verify that we are targeting a 486 or better. If not,
	// we will turn this bswap into something that will be lowered to logical
	// ops instead of emitting the bswap asm. For now, we don't support 486 or
	// lower so don't worry about this.
	// bswap $0
	if (matchAsm(AsmPieces[0], {"bswap", "$0"}) \|\|
	matchAsm(AsmPieces[0], {"bswapl", "$0"}) \|\|
	matchAsm(AsmPieces[0], {"bswapq", "$0"}) \|\|
	matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) \|\|
	matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) \|\|
	matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) {
	// No need to check constraints, nothing other than the equivalent of
	// "=r,0" would be valid here.
	return IntrinsicLowering::LowerToByteSwap(CI);
	}

	// rorw $$8, ${0:w} --> llvm.bswap.i16
	if (CI->getType()->isIntegerTy(16) &&
	IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
	(matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) \|\|
	matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) {
	AsmPieces.clear();
	StringRef ConstraintsStr = IA->getConstraintString();
	SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
	array_pod_sort(AsmPieces.begin(), AsmPieces.end());
	if (clobbersFlagRegisters(AsmPieces))
	return IntrinsicLowering::LowerToByteSwap(CI);
	}
	break;
	case 3:
	if (CI->getType()->isIntegerTy(32) &&
	IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
	matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) &&
	matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) &&
	matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) {
	AsmPieces.clear();
	StringRef ConstraintsStr = IA->getConstraintString();
	SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
	array_pod_sort(AsmPieces.begin(), AsmPieces.end());
	if (clobbersFlagRegisters(AsmPieces))
	return IntrinsicLowering::LowerToByteSwap(CI);
	}

	if (CI->getType()->isIntegerTy(64)) {
	InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
	if (Constraints.size() >= 2 &&
	Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
	Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
	// bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
	if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) &&
	matchAsm(AsmPieces[1], {"bswap", "%edx"}) &&
	matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"}))
	return IntrinsicLowering::LowerToByteSwap(CI);
	}
	}
	break;
	}
	return false;
	}

	/// Given a constraint letter, return the type of constraint for this target.
	X86TargetLowering::ConstraintType
	X86TargetLowering::getConstraintType(StringRef Constraint) const {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	case 'R':
	case 'q':
	case 'Q':
	case 'f':
	case 't':
	case 'u':
	case 'y':
	case 'x':
	case 'v':
	case 'Y':
	case 'l':
	return C_RegisterClass;
	case 'k': // AVX512 masking registers.
	case 'a':
	case 'b':
	case 'c':
	case 'd':
	case 'S':
	case 'D':
	case 'A':
	return C_Register;
	case 'I':
	case 'J':
	case 'K':
	case 'L':
	case 'M':
	case 'N':
	case 'G':
	case 'C':
	case 'e':
	case 'Z':
	return C_Other;
	default:
	break;
	}
	}
	else if (Constraint.size() == 2) {
	switch (Constraint[0]) {
	default:
	break;
	case 'Y':
	switch (Constraint[1]) {
	default:
	break;
	case 'k':
	return C_Register;
	}
	}
	}
	return TargetLowering::getConstraintType(Constraint);
	}

	/// Examine constraint type and operand type and determine a weight value.
	/// This object must already have been set up with the operand type
	/// and the current alternative constraint selected.
	TargetLowering::ConstraintWeight
	X86TargetLowering::getSingleConstraintMatchWeight(
	AsmOperandInfo &info, const char *constraint) const {
	ConstraintWeight weight = CW_Invalid;
	Value *CallOperandVal = info.CallOperandVal;
	// If we don't have a value, we can't do a match,
	// but allow it at the lowest weight.
	if (!CallOperandVal)
	return CW_Default;
	Type *type = CallOperandVal->getType();
	// Look at the constraint type.
	switch (*constraint) {
	default:
	weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
	LLVM_FALLTHROUGH;
	case 'R':
	case 'q':
	case 'Q':
	case 'a':
	case 'b':
	case 'c':
	case 'd':
	case 'S':
	case 'D':
	case 'A':
	if (CallOperandVal->getType()->isIntegerTy())
	weight = CW_SpecificReg;
	break;
	case 'f':
	case 't':
	case 'u':
	if (type->isFloatingPointTy())
	weight = CW_SpecificReg;
	break;
	case 'y':
	if (type->isX86_MMXTy() && Subtarget.hasMMX())
	weight = CW_SpecificReg;
	break;
	case 'Y':
	// Other "Y<x>" (e.g. "Yk") constraints should be implemented below.
	if (constraint[1] == 'k') {
	// Support for 'Yk' (similarly to the 'k' variant below).
	weight = CW_SpecificReg;
	break;
	}
	// Else fall through (handle "Y" constraint).
	LLVM_FALLTHROUGH;
	case 'v':
	if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
	weight = CW_Register;
	LLVM_FALLTHROUGH;
	case 'x':
	if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) \|\|
	((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasFp256()))
	weight = CW_Register;
	break;
	case 'k':
	// Enable conditional vector operations using %k<#> registers.
	weight = CW_SpecificReg;
	break;
	case 'I':
	if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
	if (C->getZExtValue() <= 31)
	weight = CW_Constant;
	}
	break;
	case 'J':
	if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if (C->getZExtValue() <= 63)
	weight = CW_Constant;
	}
	break;
	case 'K':
	if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
	weight = CW_Constant;
	}
	break;
	case 'L':
	if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if ((C->getZExtValue() == 0xff) \|\| (C->getZExtValue() == 0xffff))
	weight = CW_Constant;
	}
	break;
	case 'M':
	if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if (C->getZExtValue() <= 3)
	weight = CW_Constant;
	}
	break;
	case 'N':
	if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if (C->getZExtValue() <= 0xff)
	weight = CW_Constant;
	}
	break;
	case 'G':
	case 'C':
	if (isa<ConstantFP>(CallOperandVal)) {
	weight = CW_Constant;
	}
	break;
	case 'e':
	if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if ((C->getSExtValue() >= -0x80000000LL) &&
	(C->getSExtValue() <= 0x7fffffffLL))
	weight = CW_Constant;
	}
	break;
	case 'Z':
	if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
	if (C->getZExtValue() <= 0xffffffff)
	weight = CW_Constant;
	}
	break;
	}
	return weight;
	}

	/// Try to replace an X constraint, which matches anything, with another that
	/// has more specific requirements based on the type of the corresponding
	/// operand.
	const char *X86TargetLowering::
	LowerXConstraint(EVT ConstraintVT) const {
	// FP X constraints get lowered to SSE1/2 registers if available, otherwise
	// 'f' like normal targets.
	if (ConstraintVT.isFloatingPoint()) {
	if (Subtarget.hasSSE2())
	return "Y";
	if (Subtarget.hasSSE1())
	return "x";
	}

	return TargetLowering::LowerXConstraint(ConstraintVT);
	}

	/// Lower the specified operand into the Ops vector.
	/// If it is invalid, don't add anything to Ops.
	void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
	std::string &Constraint,
	std::vector<SDValue>&Ops,
	SelectionDAG &DAG) const {
	SDValue Result;

	// Only support length 1 constraints for now.
	if (Constraint.length() > 1) return;

	char ConstraintLetter = Constraint[0];
	switch (ConstraintLetter) {
	default: break;
	case 'I':
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
	if (C->getZExtValue() <= 31) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'J':
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
	if (C->getZExtValue() <= 63) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'K':
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
	if (isInt<8>(C->getSExtValue())) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'L':
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
	if (C->getZExtValue() == 0xff \|\| C->getZExtValue() == 0xffff \|\|
	(Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) {
	Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'M':
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
	if (C->getZExtValue() <= 3) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'N':
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
	if (C->getZExtValue() <= 255) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'O':
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
	if (C->getZExtValue() <= 127) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	return;
	case 'e': {
	// 32-bit signed value
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
	if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
	C->getSExtValue())) {
	// Widen to 64 bits here to get it sign extended.
	Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64);
	break;
	}
	// FIXME gcc accepts some relocatable values here too, but only in certain
	// memory models; it's complicated.
	}
	return;
	}
	case 'Z': {
	// 32-bit unsigned value
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
	if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
	C->getZExtValue())) {
	Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	}
	// FIXME gcc accepts some relocatable values here too, but only in certain
	// memory models; it's complicated.
	return;
	}
	case 'i': {
	// Literal immediates are always ok.
	if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
	// Widen to 64 bits here to get it sign extended.
	Result = DAG.getTargetConstant(CST->getSExtValue(), SDLoc(Op), MVT::i64);
	break;
	}

	// In any sort of PIC mode addresses need to be computed at runtime by
	// adding in a register or some sort of table lookup. These can't
	// be used as immediates.
	if (Subtarget.isPICStyleGOT() \|\| Subtarget.isPICStyleStubPIC())
	return;

	// If we are in non-pic codegen mode, we allow the address of a global (with
	// an optional displacement) to be used with 'i'.
	GlobalAddressSDNode *GA = nullptr;
	int64_t Offset = 0;

	// Match either (GA), (GA+C), (GA+C1+C2), etc.
	while (1) {
	if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
	Offset += GA->getOffset();
	break;
	} else if (Op.getOpcode() == ISD::ADD) {
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
	Offset += C->getZExtValue();
	Op = Op.getOperand(0);
	continue;
	}
	} else if (Op.getOpcode() == ISD::SUB) {
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
	Offset += -C->getZExtValue();
	Op = Op.getOperand(0);
	continue;
	}
	}

	// Otherwise, this isn't something we can handle, reject it.
	return;
	}

	const GlobalValue *GV = GA->getGlobal();
	// If we require an extra load to get this address, as in PIC mode, we
	// can't accept it.
	if (isGlobalStubReference(Subtarget.classifyGlobalReference(GV)))
	return;

	Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op),
	GA->getValueType(0), Offset);
	break;
	}
	}

	if (Result.getNode()) {
	Ops.push_back(Result);
	return;
	}
	return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
	}

	/// Check if \p RC is a general purpose register class.
	/// I.e., GR* or one of their variant.
	static bool isGRClass(const TargetRegisterClass &RC) {
	return RC.hasSuperClassEq(&X86::GR8RegClass) \|\|
	RC.hasSuperClassEq(&X86::GR16RegClass) \|\|
	RC.hasSuperClassEq(&X86::GR32RegClass) \|\|
	RC.hasSuperClassEq(&X86::GR64RegClass) \|\|
	RC.hasSuperClassEq(&X86::LOW32_ADDR_ACCESS_RBPRegClass);
	}

	/// Check if \p RC is a vector register class.
	/// I.e., FR* / VR* or one of their variant.
	static bool isFRClass(const TargetRegisterClass &RC) {
	return RC.hasSuperClassEq(&X86::FR32XRegClass) \|\|
	RC.hasSuperClassEq(&X86::FR64XRegClass) \|\|
	RC.hasSuperClassEq(&X86::VR128XRegClass) \|\|
	RC.hasSuperClassEq(&X86::VR256XRegClass) \|\|
	RC.hasSuperClassEq(&X86::VR512RegClass);
	}

	std::pair<unsigned, const TargetRegisterClass *>
	X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
	StringRef Constraint,
	MVT VT) const {
	// First, see if this is a constraint that directly corresponds to an LLVM
	// register class.
	if (Constraint.size() == 1) {
	// GCC Constraint Letters
	switch (Constraint[0]) {
	default: break;
	// TODO: Slight differences here in allocation order and leaving
	// RIP in the class. Do they matter any more here than they do
	// in the normal allocation?
	case 'k':
	if (Subtarget.hasAVX512()) {
	// Only supported in AVX512 or later.
	switch (VT.SimpleTy) {
	default: break;
	case MVT::i32:
	return std::make_pair(0U, &X86::VK32RegClass);
	case MVT::i16:
	return std::make_pair(0U, &X86::VK16RegClass);
	case MVT::i8:
	return std::make_pair(0U, &X86::VK8RegClass);
	case MVT::i1:
	return std::make_pair(0U, &X86::VK1RegClass);
	case MVT::i64:
	return std::make_pair(0U, &X86::VK64RegClass);
	}
	}
	break;
	case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
	if (Subtarget.is64Bit()) {
	if (VT == MVT::i32 \|\| VT == MVT::f32)
	return std::make_pair(0U, &X86::GR32RegClass);
	if (VT == MVT::i16)
	return std::make_pair(0U, &X86::GR16RegClass);
	if (VT == MVT::i8 \|\| VT == MVT::i1)
	return std::make_pair(0U, &X86::GR8RegClass);
	if (VT == MVT::i64 \|\| VT == MVT::f64)
	return std::make_pair(0U, &X86::GR64RegClass);
	break;
	}
	LLVM_FALLTHROUGH;
	// 32-bit fallthrough
	case 'Q': // Q_REGS
	if (VT == MVT::i32 \|\| VT == MVT::f32)
	return std::make_pair(0U, &X86::GR32_ABCDRegClass);
	if (VT == MVT::i16)
	return std::make_pair(0U, &X86::GR16_ABCDRegClass);
	if (VT == MVT::i8 \|\| VT == MVT::i1)
	return std::make_pair(0U, &X86::GR8_ABCD_LRegClass);
	if (VT == MVT::i64)
	return std::make_pair(0U, &X86::GR64_ABCDRegClass);
	break;
	case 'r': // GENERAL_REGS
	case 'l': // INDEX_REGS
	if (VT == MVT::i8 \|\| VT == MVT::i1)
	return std::make_pair(0U, &X86::GR8RegClass);
	if (VT == MVT::i16)
	return std::make_pair(0U, &X86::GR16RegClass);
	if (VT == MVT::i32 \|\| VT == MVT::f32 \|\| !Subtarget.is64Bit())
	return std::make_pair(0U, &X86::GR32RegClass);
	return std::make_pair(0U, &X86::GR64RegClass);
	case 'R': // LEGACY_REGS
	if (VT == MVT::i8 \|\| VT == MVT::i1)
	return std::make_pair(0U, &X86::GR8_NOREXRegClass);
	if (VT == MVT::i16)
	return std::make_pair(0U, &X86::GR16_NOREXRegClass);
	if (VT == MVT::i32 \|\| !Subtarget.is64Bit())
	return std::make_pair(0U, &X86::GR32_NOREXRegClass);
	return std::make_pair(0U, &X86::GR64_NOREXRegClass);
	case 'f': // FP Stack registers.
	// If SSE is enabled for this VT, use f80 to ensure the isel moves the
	// value to the correct fpstack register class.
	if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
	return std::make_pair(0U, &X86::RFP32RegClass);
	if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
	return std::make_pair(0U, &X86::RFP64RegClass);
	return std::make_pair(0U, &X86::RFP80RegClass);
	case 'y': // MMX_REGS if MMX allowed.
	if (!Subtarget.hasMMX()) break;
	return std::make_pair(0U, &X86::VR64RegClass);
	case 'Y': // SSE_REGS if SSE2 allowed
	if (!Subtarget.hasSSE2()) break;
	LLVM_FALLTHROUGH;
	case 'v':
	case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
	if (!Subtarget.hasSSE1()) break;
	bool VConstraint = (Constraint[0] == 'v');

	switch (VT.SimpleTy) {
	default: break;
	// Scalar SSE types.
	case MVT::f32:
	case MVT::i32:
	if (VConstraint && Subtarget.hasAVX512() && Subtarget.hasVLX())
	return std::make_pair(0U, &X86::FR32XRegClass);
	return std::make_pair(0U, &X86::FR32RegClass);
	case MVT::f64:
	case MVT::i64:
	if (VConstraint && Subtarget.hasVLX())
	return std::make_pair(0U, &X86::FR64XRegClass);
	return std::make_pair(0U, &X86::FR64RegClass);
	// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
	// Vector types.
	case MVT::v16i8:
	case MVT::v8i16:
	case MVT::v4i32:
	case MVT::v2i64:
	case MVT::v4f32:
	case MVT::v2f64:
	if (VConstraint && Subtarget.hasVLX())
	return std::make_pair(0U, &X86::VR128XRegClass);
	return std::make_pair(0U, &X86::VR128RegClass);
	// AVX types.
	case MVT::v32i8:
	case MVT::v16i16:
	case MVT::v8i32:
	case MVT::v4i64:
	case MVT::v8f32:
	case MVT::v4f64:
	if (VConstraint && Subtarget.hasVLX())
	return std::make_pair(0U, &X86::VR256XRegClass);
	return std::make_pair(0U, &X86::VR256RegClass);
	case MVT::v8f64:
	case MVT::v16f32:
	case MVT::v16i32:
	case MVT::v8i64:
	return std::make_pair(0U, &X86::VR512RegClass);
	}
	break;
	}
	} else if (Constraint.size() == 2 && Constraint[0] == 'Y') {
	switch (Constraint[1]) {
	default:
	break;
	case 'k':
	// This register class doesn't allocate k0 for masked vector operation.
	if (Subtarget.hasAVX512()) { // Only supported in AVX512.
	switch (VT.SimpleTy) {
	default: break;
	case MVT::i32:
	return std::make_pair(0U, &X86::VK32WMRegClass);
	case MVT::i16:
	return std::make_pair(0U, &X86::VK16WMRegClass);
	case MVT::i8:
	return std::make_pair(0U, &X86::VK8WMRegClass);
	case MVT::i1:
	return std::make_pair(0U, &X86::VK1WMRegClass);
	case MVT::i64:
	return std::make_pair(0U, &X86::VK64WMRegClass);
	}
	}
	break;
	}
	}

	// Use the default implementation in TargetLowering to convert the register
	// constraint into a member of a register class.
	std::pair<unsigned, const TargetRegisterClass*> Res;
	Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

	// Not found as a standard register?
	if (!Res.second) {
	// Map st(0) -> st(7) -> ST0
	if (Constraint.size() == 7 && Constraint[0] == '{' &&
	tolower(Constraint[1]) == 's' &&
	tolower(Constraint[2]) == 't' &&
	Constraint[3] == '(' &&
	(Constraint[4] >= '0' && Constraint[4] <= '7') &&
	Constraint[5] == ')' &&
	Constraint[6] == '}') {

	Res.first = X86::FP0+Constraint[4]-'0';
	Res.second = &X86::RFP80RegClass;
	return Res;
	}

	// GCC allows "st(0)" to be called just plain "st".
	if (StringRef("{st}").equals_lower(Constraint)) {
	Res.first = X86::FP0;
	Res.second = &X86::RFP80RegClass;
	return Res;
	}

	// flags -> EFLAGS
	if (StringRef("{flags}").equals_lower(Constraint)) {
	Res.first = X86::EFLAGS;
	Res.second = &X86::CCRRegClass;
	return Res;
	}

	// 'A' means [ER]AX + [ER]DX.
	if (Constraint == "A") {
	if (Subtarget.is64Bit()) {
	Res.first = X86::RAX;
	Res.second = &X86::GR64_ADRegClass;
	} else {
	assert((Subtarget.is32Bit() \|\| Subtarget.is16Bit()) &&
	"Expecting 64, 32 or 16 bit subtarget");
	Res.first = X86::EAX;
	Res.second = &X86::GR32_ADRegClass;
	}
	return Res;
	}
	return Res;
	}

	// Otherwise, check to see if this is a register class of the wrong value
	// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
	// turn into {ax},{dx}.
	// MVT::Other is used to specify clobber names.
	if (TRI->isTypeLegalForClass(*Res.second, VT) \|\| VT == MVT::Other)
	return Res; // Correct type already, nothing to do.

	// Get a matching integer of the correct size. i.e. "ax" with MVT::32 should
	// return "eax". This should even work for things like getting 64bit integer
	// registers when given an f64 type.
	const TargetRegisterClass *Class = Res.second;
	// The generic code will match the first register class that contains the
	// given register. Thus, based on the ordering of the tablegened file,
	// the "plain" GR classes might not come first.
	// Therefore, use a helper method.
	if (isGRClass(*Class)) {
	unsigned Size = VT.getSizeInBits();
	if (Size == 1) Size = 8;
	unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, Size);
	if (DestReg > 0) {
	Res.first = DestReg;
	Res.second = Size == 8 ? &X86::GR8RegClass
	: Size == 16 ? &X86::GR16RegClass
	: Size == 32 ? &X86::GR32RegClass
	: &X86::GR64RegClass;
	assert(Res.second->contains(Res.first) && "Register in register class");
	} else {
	// No register found/type mismatch.
	Res.first = 0;
	Res.second = nullptr;
	}
	} else if (isFRClass(*Class)) {
	// Handle references to XMM physical registers that got mapped into the
	// wrong class. This can happen with constraints like {xmm0} where the
	// target independent register mapper will just pick the first match it can
	// find, ignoring the required type.

	// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
	if (VT == MVT::f32 \|\| VT == MVT::i32)
	Res.second = &X86::FR32RegClass;
	else if (VT == MVT::f64 \|\| VT == MVT::i64)
	Res.second = &X86::FR64RegClass;
	else if (TRI->isTypeLegalForClass(X86::VR128RegClass, VT))
	Res.second = &X86::VR128RegClass;
	else if (TRI->isTypeLegalForClass(X86::VR256RegClass, VT))
	Res.second = &X86::VR256RegClass;
	else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT))
	Res.second = &X86::VR512RegClass;
	else {
	// Type mismatch and not a clobber: Return an error;
	Res.first = 0;
	Res.second = nullptr;
	}
	}

	return Res;
	}

	int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
	const AddrMode &AM, Type *Ty,
	unsigned AS) const {
	// Scaling factors are not free at all.
	// An indexed folded instruction, i.e., inst (reg1, reg2, scale),
	// will take 2 allocations in the out of order engine instead of 1
	// for plain addressing mode, i.e. inst (reg1).
	// E.g.,
	// vaddps (%rsi,%drx), %ymm0, %ymm1
	// Requires two allocations (one for the load, one for the computation)
	// whereas:
	// vaddps (%rsi), %ymm0, %ymm1
	// Requires just 1 allocation, i.e., freeing allocations for other operations
	// and having less micro operations to execute.
	//
	// For some X86 architectures, this is even worse because for instance for
	// stores, the complex addressing mode forces the instruction to use the
	// "load" ports instead of the dedicated "store" port.
	// E.g., on Haswell:
	// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
	// vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
	if (isLegalAddressingMode(DL, AM, Ty, AS))
	// Scale represents reg2 * scale, thus account for 1
	// as soon as we use a second register.
	return AM.Scale != 0;
	return -1;
	}

	bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
	// Integer division on x86 is expensive. However, when aggressively optimizing
	// for code size, we prefer to use a div instruction, as it is usually smaller
	// than the alternative sequence.
	// The exception to this is vector division. Since x86 doesn't have vector
	// integer division, leaving the division as-is is a loss even in terms of
	// size, because it will have to be scalarized, while the alternative code
	// sequence can be performed in vector form.
	bool OptSize =
	Attr.hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize);
	return OptSize && !VT.isVector();
	}

	void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
	if (!Subtarget.is64Bit())
	return;

	// Update IsSplitCSR in X86MachineFunctionInfo.
	X86MachineFunctionInfo *AFI =
	Entry->getParent()->getInfo<X86MachineFunctionInfo>();
	AFI->setIsSplitCSR(true);
	}

	void X86TargetLowering::insertCopiesSplitCSR(
	MachineBasicBlock *Entry,
	const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
	const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
	if (!IStart)
	return;

	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
	MachineBasicBlock::iterator MBBI = Entry->begin();
	for (const MCPhysReg I = IStart; I; ++I) {
	const TargetRegisterClass *RC = nullptr;
	if (X86::GR64RegClass.contains(*I))
	RC = &X86::GR64RegClass;
	else
	llvm_unreachable("Unexpected register class in CSRsViaCopy!");

	unsigned NewVR = MRI->createVirtualRegister(RC);
	// Create copy from CSR to a virtual register.
	// FIXME: this currently does not emit CFI pseudo-instructions, it works
	// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
	// nounwind. If we want to generalize this later, we may need to emit
	// CFI pseudo-instructions.
	assert(Entry->getParent()->getFunction()->hasFnAttribute(
	Attribute::NoUnwind) &&
	"Function should be nounwind in insertCopiesSplitCSR!");
	Entry->addLiveIn(*I);
	BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
	.addReg(*I);

	// Insert the copy-back instructions right before the terminator.
	for (auto *Exit : Exits)
	BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
	TII->get(TargetOpcode::COPY), *I)
	.addReg(NewVR);
	}
	}

	bool X86TargetLowering::supportSwiftError() const {
	return Subtarget.is64Bit();
	}

	/// Returns the name of the symbol used to emit stack probes or the empty
	/// string if not applicable.
	StringRef X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
	// If the function specifically requests stack probes, emit them.
	if (MF.getFunction()->hasFnAttribute("probe-stack"))
	return MF.getFunction()->getFnAttribute("probe-stack").getValueAsString();

	// Generally, if we aren't on Windows, the platform ABI does not include
	// support for stack probes, so don't emit them.
	if (!Subtarget.isOSWindows() \|\| Subtarget.isTargetMachO())
	return "";

	// We need a stack probe to conform to the Windows ABI. Choose the right
	// symbol.
	if (Subtarget.is64Bit())
	return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk";
	return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk";
	}
	Index: head/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
	===================================================================
	--- head/contrib/llvm/lib/Target/X86/X86InstrAVX512.td (revision 322854)
	+++ head/contrib/llvm/lib/Target/X86/X86InstrAVX512.td (revision 322855)
	@@ -1,10244 +1,10244 @@
	//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------- tablegen --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file describes the X86 AVX512 instruction set, defining the
	// instructions, and properties of the instructions which are needed for code
	// generation, machine code emission, and analysis.
	//
	//===----------------------------------------------------------------------===//

	// Group template arguments that can be derived from the vector type (EltNum x
	// EltVT). These are things like the register class for the writemask, etc.
	// The idea is to pass one of these as the template argument rather than the
	// individual arguments.
	// The template is also used for scalar types, in this case numelts is 1.
	class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
	string suffix = ""> {
	RegisterClass RC = rc;
	ValueType EltVT = eltvt;
	int NumElts = numelts;

	// Corresponding mask register class.
	RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);

	// Corresponding write-mask register class.
	RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");

	// The mask VT.
	ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");

	// Suffix used in the instruction mnemonic.
	string Suffix = suffix;

	// VTName is a string name for vector VT. For vector types it will be
	// v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
	// It is a little bit complex for scalar types, where NumElts = 1.
	// In this case we build v4f32 or v2f64
	string VTName = "v" # !if (!eq (NumElts, 1),
	!if (!eq (EltVT.Size, 32), 4,
	!if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;

	// The vector VT.
	ValueType VT = !cast<ValueType>(VTName);

	string EltTypeName = !cast<string>(EltVT);
	// Size of the element type in bits, e.g. 32 for v16i32.
	string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
	int EltSize = EltVT.Size;

	// "i" for integer types and "f" for floating-point types
	string TypeVariantName = !subst(EltSizeName, "", EltTypeName);

	// Size of RC in bits, e.g. 512 for VR512.
	int Size = VT.Size;

	// The corresponding memory operand, e.g. i512mem for VR512.
	X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
	X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
	// FP scalar memory operand for intrinsics - ssmem/sdmem.
	Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
	!if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));

	// Load patterns
	// Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
	// due to load promotion during legalization
	PatFrag LdFrag = !cast<PatFrag>("load" #
	!if (!eq (TypeVariantName, "i"),
	!if (!eq (Size, 128), "v2i64",
	!if (!eq (Size, 256), "v4i64",
	!if (!eq (Size, 512), "v8i64",
	VTName))), VTName));

	PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
	!if (!eq (TypeVariantName, "i"),
	!if (!eq (Size, 128), "v2i64",
	!if (!eq (Size, 256), "v4i64",
	!if (!eq (Size, 512), "v8i64",
	VTName))), VTName));

	PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);

	ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
	!cast<ComplexPattern>("sse_load_f32"),
	!if (!eq (EltTypeName, "f64"),
	!cast<ComplexPattern>("sse_load_f64"),
	?));

	// The corresponding float type, e.g. v16f32 for v16i32
	// Note: For EltSize < 32, FloatVT is illegal and TableGen
	// fails to compile, so we choose FloatVT = VT
	ValueType FloatVT = !cast<ValueType>(
	!if (!eq (!srl(EltSize,5),0),
	VTName,
	!if (!eq(TypeVariantName, "i"),
	"v" # NumElts # "f" # EltSize,
	VTName)));

	ValueType IntVT = !cast<ValueType>(
	!if (!eq (!srl(EltSize,5),0),
	VTName,
	!if (!eq(TypeVariantName, "f"),
	"v" # NumElts # "i" # EltSize,
	VTName)));
	// The string to specify embedded broadcast in assembly.
	string BroadcastStr = "{1to" # NumElts # "}";

	// 8-bit compressed displacement tuple/subvector format. This is only
	// defined for NumElts <= 8.
	CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
	!cast<CD8VForm>("CD8VT" # NumElts), ?);

	SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
	!if (!eq (Size, 256), sub_ymm, ?));

	Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
	!if (!eq (EltTypeName, "f64"), SSEPackedDouble,
	SSEPackedInt));

	RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);

	// A vector tye of the same width with element type i64. This is used to
	// create patterns for logic ops.
	ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");

	// A vector type of the same width with element type i32. This is used to
	// create the canonical constant zero node ImmAllZerosV.
	ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
	dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));

	string ZSuffix = !if (!eq (Size, 128), "Z128",
	!if (!eq (Size, 256), "Z256", "Z"));
	}

	def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
	def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
	def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
	def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
	def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
	def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;

	// "x" in v32i8x_info means RC = VR256X
	def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
	def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
	def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
	def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
	def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
	def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;

	def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
	def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
	def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
	def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
	def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
	def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;

	// We map scalar types to the smallest (128-bit) vector type
	// with the appropriate element type. This allows to use the same masking logic.
	def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
	def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
	def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
	def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;

	class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
	X86VectorVTInfo i128> {
	X86VectorVTInfo info512 = i512;
	X86VectorVTInfo info256 = i256;
	X86VectorVTInfo info128 = i128;
	}

	def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
	v16i8x_info>;
	def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
	v8i16x_info>;
	def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
	v4i32x_info>;
	def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
	v2i64x_info>;
	def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
	v4f32x_info>;
	def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
	v2f64x_info>;

	class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
	ValueType _vt> {
	RegisterClass KRC = _krc;
	RegisterClass KRCWM = _krcwm;
	ValueType KVT = _vt;
	}

	def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
	def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
	def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
	def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
	def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
	def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;

	// This multiclass generates the masking variants from the non-masking
	// variant. It only provides the assembly pieces for the masking variants.
	// It assumes custom ISel patterns for masking which can be provided as
	// template arguments.
	multiclass AVX512_maskable_custom<bits<8> O, Format F,
	dag Outs,
	dag Ins, dag MaskingIns, dag ZeroMaskingIns,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	list<dag> Pattern,
	list<dag> MaskingPattern,
	list<dag> ZeroMaskingPattern,
	string MaskingConstraint = "",
	InstrItinClass itin = NoItinerary,
	bit IsCommutable = 0,
	bit IsKCommutable = 0> {
	let isCommutable = IsCommutable in
	def NAME: AVX512<O, F, Outs, Ins,
	OpcodeStr#"\t{"#AttSrcAsm#", $dst\|"#
	"$dst, "#IntelSrcAsm#"}",
	Pattern, itin>;

	// Prefer over VMOV*rrk Pat<>
	let isCommutable = IsKCommutable in
	def NAME#k: AVX512<O, F, Outs, MaskingIns,
	OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}\|"#
	"$dst {${mask}}, "#IntelSrcAsm#"}",
	MaskingPattern, itin>,
	EVEX_K {
	// In case of the 3src subclass this is overridden with a let.
	string Constraints = MaskingConstraint;
	}

	// Zero mask does not add any restrictions to commute operands transformation.
	// So, it is Ok to use IsCommutable instead of IsKCommutable.
	let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
	def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
	OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}\|"#
	"$dst {${mask}} {z}, "#IntelSrcAsm#"}",
	ZeroMaskingPattern,
	itin>,
	EVEX_KZ;
	}


	// Common base class of AVX512_maskable and AVX512_maskable_3src.
	multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs,
	dag Ins, dag MaskingIns, dag ZeroMaskingIns,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, dag MaskingRHS,
	SDNode Select = vselect,
	string MaskingConstraint = "",
	InstrItinClass itin = NoItinerary,
	bit IsCommutable = 0,
	bit IsKCommutable = 0> :
	AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
	AttSrcAsm, IntelSrcAsm,
	[(set _.RC:$dst, RHS)],
	[(set _.RC:$dst, MaskingRHS)],
	[(set _.RC:$dst,
	(Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
	MaskingConstraint, NoItinerary, IsCommutable,
	IsKCommutable>;

	// Similar to AVX512_maskable_common, but with scalar types.
	multiclass AVX512_maskable_fp_common<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs,
	dag Ins, dag MaskingIns, dag ZeroMaskingIns,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	SDNode Select = vselect,
	string MaskingConstraint = "",
	InstrItinClass itin = NoItinerary,
	bit IsCommutable = 0,
	bit IsKCommutable = 0> :
	AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
	AttSrcAsm, IntelSrcAsm,
	[], [], [],
	MaskingConstraint, NoItinerary, IsCommutable,
	IsKCommutable>;

	// This multiclass generates the unconditional/non-masking, the masking and
	// the zero-masking variant of the vector instruction. In the masking case, the
	// perserved vector elements come from a new dummy input operand tied to $dst.
	multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag Ins, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS,
	InstrItinClass itin = NoItinerary,
	bit IsCommutable = 0, bit IsKCommutable = 0,
	SDNode Select = vselect> :
	AVX512_maskable_common<O, F, _, Outs, Ins,
	!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
	!con((ins _.KRCWM:$mask), Ins),
	OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
	(Select _.KRCWM:$mask, RHS, _.RC:$src0), Select,
	"$src0 = $dst", itin, IsCommutable, IsKCommutable>;

	// This multiclass generates the unconditional/non-masking, the masking and
	// the zero-masking variant of the scalar instruction.
	multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag Ins, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS,
	InstrItinClass itin = NoItinerary,
	bit IsCommutable = 0> :
	AVX512_maskable_common<O, F, _, Outs, Ins,
	!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
	!con((ins _.KRCWM:$mask), Ins),
	OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
	(X86selects _.KRCWM:$mask, RHS, _.RC:$src0),
	X86selects, "$src0 = $dst", itin, IsCommutable>;

	// Similar to AVX512_maskable but in this case one of the source operands
	// ($src1) is already tied to $dst so we just use that for the preserved
	// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
	// $src1.
	multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag NonTiedIns, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, bit IsCommutable = 0,
	bit IsKCommutable = 0> :
	AVX512_maskable_common<O, F, _, Outs,
	!con((ins _.RC:$src1), NonTiedIns),
	!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
	!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
	OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
	(vselect _.KRCWM:$mask, RHS, _.RC:$src1),
	vselect, "", NoItinerary, IsCommutable, IsKCommutable>;

	multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag NonTiedIns, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, bit IsCommutable = 0,
	bit IsKCommutable = 0> :
	AVX512_maskable_common<O, F, _, Outs,
	!con((ins _.RC:$src1), NonTiedIns),
	!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
	!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
	OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
	(X86selects _.KRCWM:$mask, RHS, _.RC:$src1),
	X86selects, "", NoItinerary, IsCommutable,
	IsKCommutable>;

	multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag Ins,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	list<dag> Pattern> :
	AVX512_maskable_custom<O, F, Outs, Ins,
	!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
	!con((ins _.KRCWM:$mask), Ins),
	OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
	"$src0 = $dst">;


	// Instruction with mask that puts result in mask register,
	// like "compare" and "vptest"
	multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
	dag Outs,
	dag Ins, dag MaskingIns,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	list<dag> Pattern,
	list<dag> MaskingPattern,
	bit IsCommutable = 0> {
	let isCommutable = IsCommutable in
	def NAME: AVX512<O, F, Outs, Ins,
	OpcodeStr#"\t{"#AttSrcAsm#", $dst\|"#
	"$dst, "#IntelSrcAsm#"}",
	Pattern, NoItinerary>;

	def NAME#k: AVX512<O, F, Outs, MaskingIns,
	OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}\|"#
	"$dst {${mask}}, "#IntelSrcAsm#"}",
	MaskingPattern, NoItinerary>, EVEX_K;
	}

	multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs,
	dag Ins, dag MaskingIns,
	string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, dag MaskingRHS,
	bit IsCommutable = 0> :
	AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
	AttSrcAsm, IntelSrcAsm,
	[(set _.KRC:$dst, RHS)],
	[(set _.KRC:$dst, MaskingRHS)], IsCommutable>;

	multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag Ins, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, bit IsCommutable = 0> :
	AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
	!con((ins _.KRCWM:$mask), Ins),
	OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
	(and _.KRCWM:$mask, RHS), IsCommutable>;

	multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag Ins, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm> :
	AVX512_maskable_custom_cmp<O, F, Outs,
	Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
	AttSrcAsm, IntelSrcAsm, [],[]>;

	// This multiclass generates the unconditional/non-masking, the masking and
	// the zero-masking variant of the vector instruction. In the masking case, the
	// perserved vector elements come from a new dummy input operand tied to $dst.
	multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
	dag Outs, dag Ins, string OpcodeStr,
	string AttSrcAsm, string IntelSrcAsm,
	dag RHS, dag MaskedRHS,
	InstrItinClass itin = NoItinerary,
	bit IsCommutable = 0, SDNode Select = vselect> :
	AVX512_maskable_custom<O, F, Outs, Ins,
	!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
	!con((ins _.KRCWM:$mask), Ins),
	OpcodeStr, AttSrcAsm, IntelSrcAsm,
	[(set _.RC:$dst, RHS)],
	[(set _.RC:$dst,
	(Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
	[(set _.RC:$dst,
	(Select _.KRCWM:$mask, MaskedRHS,
	_.ImmAllZerosV))],
	"$src0 = $dst", itin, IsCommutable>;

	// Bitcasts between 512-bit vector types. Return the original type since
	// no instruction is needed for the conversion.
	def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
	def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
	def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
	def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
	def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
	def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
	def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
	def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
	def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
	def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
	def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
	def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
	def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
	def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
	def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
	def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
	def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
	def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
	def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
	def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
	def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
	def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
	def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
	def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
	def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
	def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
	def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
	def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
	def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
	def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
	def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;

	// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
	// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
	// swizzled by ExecutionDepsFix to pxor.
	// We set canFoldAsLoad because this can be converted to a constant-pool
	// load of an all-zeros value if folding it would be beneficial.
	let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
	isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
	def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
	[(set VR512:$dst, (v16i32 immAllZerosV))]>;
	def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
	[(set VR512:$dst, (v16i32 immAllOnesV))]>;
	}

	// Alias instructions that allow VPTERNLOG to be used with a mask to create
	// a mix of all ones and all zeros elements. This is done this way to force
	// the same register to be used as input for all three sources.
	let isPseudo = 1, Predicates = [HasAVX512] in {
	def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
	(ins VK16WM:$mask), "",
	[(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
	(v16i32 immAllOnesV),
	(v16i32 immAllZerosV)))]>;
	def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
	(ins VK8WM:$mask), "",
	[(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
	(bc_v8i64 (v16i32 immAllOnesV)),
	(bc_v8i64 (v16i32 immAllZerosV))))]>;
	}

	let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
	isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
	def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
	[(set VR128X:$dst, (v4i32 immAllZerosV))]>;
	def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
	[(set VR256X:$dst, (v8i32 immAllZerosV))]>;
	}

	// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
	// This is expanded by ExpandPostRAPseudos.
	let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
	isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
	def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
	[(set FR32X:$dst, fp32imm0)]>;
	def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
	[(set FR64X:$dst, fpimm0)]>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 - VECTOR INSERT
	//
	multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To,
	PatFrag vinsert_insert> {
	let ExeDomain = To.ExeDomain in {
	defm rr : AVX512_maskable<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
	(ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
	"vinsert" # From.EltTypeName # "x" # From.NumElts,
	"$src3, $src2, $src1", "$src1, $src2, $src3",
	(vinsert_insert:$src3 (To.VT To.RC:$src1),
	(From.VT From.RC:$src2),
	(iPTR imm))>, AVX512AIi8Base, EVEX_4V;

	defm rm : AVX512_maskable<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
	(ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
	"vinsert" # From.EltTypeName # "x" # From.NumElts,
	"$src3, $src2, $src1", "$src1, $src2, $src3",
	(vinsert_insert:$src3 (To.VT To.RC:$src1),
	(From.VT (bitconvert (From.LdFrag addr:$src2))),
	(iPTR imm))>, AVX512AIi8Base, EVEX_4V,
	EVEX_CD8<From.EltSize, From.CD8TupleForm>;
	}
	}

	multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
	X86VectorVTInfo To, PatFrag vinsert_insert,
	SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
	let Predicates = p in {
	def : Pat<(vinsert_insert:$ins
	(To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
	(To.VT (!cast<Instruction>(InstrStr#"rr")
	To.RC:$src1, From.RC:$src2,
	(INSERT_get_vinsert_imm To.RC:$ins)))>;

	def : Pat<(vinsert_insert:$ins
	(To.VT To.RC:$src1),
	(From.VT (bitconvert (From.LdFrag addr:$src2))),
	(iPTR imm)),
	(To.VT (!cast<Instruction>(InstrStr#"rm")
	To.RC:$src1, addr:$src2,
	(INSERT_get_vinsert_imm To.RC:$ins)))>;
	}
	}

	multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
	ValueType EltVT64, int Opcode256> {

	let Predicates = [HasVLX] in
	defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
	X86VectorVTInfo< 4, EltVT32, VR128X>,
	X86VectorVTInfo< 8, EltVT32, VR256X>,
	vinsert128_insert>, EVEX_V256;

	defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
	X86VectorVTInfo< 4, EltVT32, VR128X>,
	X86VectorVTInfo<16, EltVT32, VR512>,
	vinsert128_insert>, EVEX_V512;

	defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
	X86VectorVTInfo< 4, EltVT64, VR256X>,
	X86VectorVTInfo< 8, EltVT64, VR512>,
	vinsert256_insert>, VEX_W, EVEX_V512;

	let Predicates = [HasVLX, HasDQI] in
	defm NAME # "64x2Z256" : vinsert_for_size<Opcode128,
	X86VectorVTInfo< 2, EltVT64, VR128X>,
	X86VectorVTInfo< 4, EltVT64, VR256X>,
	vinsert128_insert>, VEX_W, EVEX_V256;

	let Predicates = [HasDQI] in {
	defm NAME # "64x2Z" : vinsert_for_size<Opcode128,
	X86VectorVTInfo< 2, EltVT64, VR128X>,
	X86VectorVTInfo< 8, EltVT64, VR512>,
	vinsert128_insert>, VEX_W, EVEX_V512;

	defm NAME # "32x8Z" : vinsert_for_size<Opcode256,
	X86VectorVTInfo< 8, EltVT32, VR256X>,
	X86VectorVTInfo<16, EltVT32, VR512>,
	vinsert256_insert>, EVEX_V512;
	}
	}

	defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
	defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;

	// Codegen pattern with the alternative types,
	// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
	defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
	defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;

	defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
	defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;

	defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
	vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
	defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
	vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;

	// Codegen pattern with the alternative types insert VEC128 into VEC256
	defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
	defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
	// Codegen pattern with the alternative types insert VEC128 into VEC512
	defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
	defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
	vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
	// Codegen pattern with the alternative types insert VEC256 into VEC512
	defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
	vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
	defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
	vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;

	// vinsertps - insert f32 to XMM
	let ExeDomain = SSEPackedSingle in {
	def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
	(ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
	"vinsertps\t{$src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3}",
	[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
	EVEX_4V;
	def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
	(ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
	"vinsertps\t{$src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3}",
	[(set VR128X:$dst, (X86insertps VR128X:$src1,
	(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
	imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 VECTOR EXTRACT
	//---

	multiclass vextract_for_size<int Opcode,
	X86VectorVTInfo From, X86VectorVTInfo To,
	PatFrag vextract_extract,
	SDNodeXForm EXTRACT_get_vextract_imm> {

	let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
	// use AVX512_maskable_in_asm (AVX512_maskable can't be used due to
	// vextract_extract), we interesting only in patterns without mask,
	// intrinsics pattern match generated bellow.
	defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
	(ins From.RC:$src1, u8imm:$idx),
	"vextract" # To.EltTypeName # "x" # To.NumElts,
	"$idx, $src1", "$src1, $idx",
	[(set To.RC:$dst, (vextract_extract:$idx (From.VT From.RC:$src1),
	(iPTR imm)))]>,
	AVX512AIi8Base, EVEX;
	def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
	(ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
	"vextract" # To.EltTypeName # "x" # To.NumElts #
	"\t{$idx, $src1, $dst\|$dst, $src1, $idx}",
	[(store (To.VT (vextract_extract:$idx
	(From.VT From.RC:$src1), (iPTR imm))),
	addr:$dst)]>, EVEX;

	let mayStore = 1, hasSideEffects = 0 in
	def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
	(ins To.MemOp:$dst, To.KRCWM:$mask,
	From.RC:$src1, u8imm:$idx),
	"vextract" # To.EltTypeName # "x" # To.NumElts #
	"\t{$idx, $src1, $dst {${mask}}\|"
	"$dst {${mask}}, $src1, $idx}",
	[]>, EVEX_K, EVEX;
	}

	def : Pat<(To.VT (vselect To.KRCWM:$mask,
	(vextract_extract:$ext (From.VT From.RC:$src1),
	(iPTR imm)),
	To.RC:$src0)),
	(!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
	From.ZSuffix # "rrk")
	To.RC:$src0, To.KRCWM:$mask, From.RC:$src1,
	(EXTRACT_get_vextract_imm To.RC:$ext))>;

	def : Pat<(To.VT (vselect To.KRCWM:$mask,
	(vextract_extract:$ext (From.VT From.RC:$src1),
	(iPTR imm)),
	To.ImmAllZerosV)),
	(!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
	From.ZSuffix # "rrkz")
	To.KRCWM:$mask, From.RC:$src1,
	(EXTRACT_get_vextract_imm To.RC:$ext))>;
	}

	// Codegen pattern for the alternative types
	multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
	X86VectorVTInfo To, PatFrag vextract_extract,
	SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
	let Predicates = p in {
	def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
	(To.VT (!cast<Instruction>(InstrStr#"rr")
	From.RC:$src1,
	(EXTRACT_get_vextract_imm To.RC:$ext)))>;
	def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
	(iPTR imm))), addr:$dst),
	(!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
	(EXTRACT_get_vextract_imm To.RC:$ext))>;
	}
	}

	multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
	ValueType EltVT64, int Opcode256> {
	defm NAME # "32x4Z" : vextract_for_size<Opcode128,
	X86VectorVTInfo<16, EltVT32, VR512>,
	X86VectorVTInfo< 4, EltVT32, VR128X>,
	vextract128_extract,
	EXTRACT_get_vextract128_imm>,
	EVEX_V512, EVEX_CD8<32, CD8VT4>;
	defm NAME # "64x4Z" : vextract_for_size<Opcode256,
	X86VectorVTInfo< 8, EltVT64, VR512>,
	X86VectorVTInfo< 4, EltVT64, VR256X>,
	vextract256_extract,
	EXTRACT_get_vextract256_imm>,
	VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
	let Predicates = [HasVLX] in
	defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
	X86VectorVTInfo< 8, EltVT32, VR256X>,
	X86VectorVTInfo< 4, EltVT32, VR128X>,
	vextract128_extract,
	EXTRACT_get_vextract128_imm>,
	EVEX_V256, EVEX_CD8<32, CD8VT4>;
	let Predicates = [HasVLX, HasDQI] in
	defm NAME # "64x2Z256" : vextract_for_size<Opcode128,
	X86VectorVTInfo< 4, EltVT64, VR256X>,
	X86VectorVTInfo< 2, EltVT64, VR128X>,
	vextract128_extract,
	EXTRACT_get_vextract128_imm>,
	VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
	let Predicates = [HasDQI] in {
	defm NAME # "64x2Z" : vextract_for_size<Opcode128,
	X86VectorVTInfo< 8, EltVT64, VR512>,
	X86VectorVTInfo< 2, EltVT64, VR128X>,
	vextract128_extract,
	EXTRACT_get_vextract128_imm>,
	VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
	defm NAME # "32x8Z" : vextract_for_size<Opcode256,
	X86VectorVTInfo<16, EltVT32, VR512>,
	X86VectorVTInfo< 8, EltVT32, VR256X>,
	vextract256_extract,
	EXTRACT_get_vextract256_imm>,
	EVEX_V512, EVEX_CD8<32, CD8VT8>;
	}
	}

	defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
	defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;

	// extract_subvector codegen patterns with the alternative types.
	// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
	defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
	defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;

	defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
	vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
	defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
	vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;

	defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
	defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;

	// Codegen pattern with the alternative types extract VEC128 from VEC256
	defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
	defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;

	// Codegen pattern with the alternative types extract VEC128 from VEC512
	defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
	defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
	vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
	// Codegen pattern with the alternative types extract VEC256 from VEC512
	defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
	vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
	defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
	vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;

	// A 128-bit subvector extract from the first 256-bit vector position
	// is a subregister copy that needs no instruction.
	def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
	(v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
	def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
	(v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
	def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
	(v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
	def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
	(v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
	def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 0))),
	(v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm))>;
	def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 0))),
	(v16i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_xmm))>;

	// A 256-bit subvector extract from the first 256-bit vector position
	// is a subregister copy that needs no instruction.
	def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
	(v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
	def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
	(v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
	def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
	(v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
	def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
	(v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
	def : Pat<(v16i16 (extract_subvector (v32i16 VR512:$src), (iPTR 0))),
	(v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm))>;
	def : Pat<(v32i8 (extract_subvector (v64i8 VR512:$src), (iPTR 0))),
	(v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm))>;

	let AddedComplexity = 25 in { // to give priority over vinsertf128rm
	// A 128-bit subvector insert to the first 512-bit vector position
	// is a subregister copy that needs no instruction.
	def : Pat<(v8i64 (insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0))),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
	def : Pat<(v8f64 (insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0))),
	(INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
	def : Pat<(v16i32 (insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0))),
	(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
	def : Pat<(v16f32 (insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0))),
	(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
	def : Pat<(v32i16 (insert_subvector undef, (v8i16 VR128X:$src), (iPTR 0))),
	(INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
	def : Pat<(v64i8 (insert_subvector undef, (v16i8 VR128X:$src), (iPTR 0))),
	(INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;

	// A 256-bit subvector insert to the first 512-bit vector position
	// is a subregister copy that needs no instruction.
	def : Pat<(v8i64 (insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0))),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
	def : Pat<(v8f64 (insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0))),
	(INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
	def : Pat<(v16i32 (insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0))),
	(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
	def : Pat<(v16f32 (insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0))),
	(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
	def : Pat<(v32i16 (insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0))),
	(INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
	def : Pat<(v64i8 (insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0))),
	(INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
	}

	// vextractps - extract 32 bits from XMM
	def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
	(ins VR128X:$src1, u8imm:$src2),
	"vextractps\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
	EVEX;

	def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
	(ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
	"vextractps\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
	addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;

	//===---------------------------------------------------------------------===//
	// AVX-512 BROADCAST
	//---
	// broadcast with a scalar argument.
	multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
	def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
	(!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
	(COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
	def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
	(X86VBroadcast SrcInfo.FRC:$src),
	DestInfo.RC:$src0)),
	(!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
	DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
	(COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
	def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
	(X86VBroadcast SrcInfo.FRC:$src),
	DestInfo.ImmAllZerosV)),
	(!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
	DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
	}

	multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
	let ExeDomain = DestInfo.ExeDomain in {
	defm r : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
	(ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
	(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))>,
	T8PD, EVEX;
	defm m : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
	(ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
	(DestInfo.VT (X86VBroadcast
	(SrcInfo.ScalarLdFrag addr:$src)))>,
	T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
	}

	def : Pat<(DestInfo.VT (X86VBroadcast
	(SrcInfo.VT (scalar_to_vector
	(SrcInfo.ScalarLdFrag addr:$src))))),
	(!cast<Instruction>(NAME#DestInfo.ZSuffix#m) addr:$src)>;
	def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
	(X86VBroadcast
	(SrcInfo.VT (scalar_to_vector
	(SrcInfo.ScalarLdFrag addr:$src)))),
	DestInfo.RC:$src0)),
	(!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
	DestInfo.RC:$src0, DestInfo.KRCWM:$mask, addr:$src)>;
	def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
	(X86VBroadcast
	(SrcInfo.VT (scalar_to_vector
	(SrcInfo.ScalarLdFrag addr:$src)))),
	DestInfo.ImmAllZerosV)),
	(!cast<Instruction>(NAME#DestInfo.ZSuffix#mkz)
	DestInfo.KRCWM:$mask, addr:$src)>;
	}

	multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
	avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
	EVEX_V512;

	let Predicates = [HasVLX] in {
	defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
	avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
	EVEX_V256;
	}
	}

	multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
	avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
	EVEX_V512;

	let Predicates = [HasVLX] in {
	defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
	avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
	EVEX_V256;
	defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
	avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
	EVEX_V128;
	}
	}
	defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
	avx512vl_f32_info>;
	defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
	avx512vl_f64_info>, VEX_W;

	def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
	(VBROADCASTSSZm addr:$src)>;
	def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
	(VBROADCASTSDZm addr:$src)>;

	multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _,
	SDPatternOperator OpNode,
	RegisterClass SrcRC> {
	let ExeDomain = _.ExeDomain in
	defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins SrcRC:$src),
	"vpbroadcast"##_.Suffix, "$src", "$src",
	(_.VT (OpNode SrcRC:$src))>, T8PD, EVEX;
	}

	multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name,
	X86VectorVTInfo _, SDPatternOperator OpNode,
	RegisterClass SrcRC, SubRegIndex Subreg> {
	let ExeDomain = _.ExeDomain in
	defm r : AVX512_maskable_custom<opc, MRMSrcReg,
	(outs _.RC:$dst), (ins GR32:$src),
	!con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
	!con((ins _.KRCWM:$mask), (ins GR32:$src)),
	"vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
	"$src0 = $dst">, T8PD, EVEX;

	def : Pat <(_.VT (OpNode SrcRC:$src)),
	(!cast<Instruction>(Name#r)
	(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;

	def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
	(!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
	(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;

	def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
	(!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
	(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
	}

	multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
	AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
	RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, _.info512, OpNode, SrcRC,
	Subreg>, EVEX_V512;
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, _.info256, OpNode,
	SrcRC, Subreg>, EVEX_V256;
	defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, _.info128, OpNode,
	SrcRC, Subreg>, EVEX_V128;
	}
	}

	multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
	SDPatternOperator OpNode,
	RegisterClass SrcRC, Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_int_broadcast_reg<opc, _.info512, OpNode, SrcRC>, EVEX_V512;
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_int_broadcast_reg<opc, _.info256, OpNode, SrcRC>, EVEX_V256;
	defm Z128 : avx512_int_broadcast_reg<opc, _.info128, OpNode, SrcRC>, EVEX_V128;
	}
	}

	defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
	avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
	defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
	avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
	HasBWI>;
	defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
	X86VBroadcast, GR32, HasAVX512>;
	defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
	X86VBroadcast, GR64, HasAVX512>, VEX_W;

	def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
	(VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
	def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
	(VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>;

	// Provide aliases for broadcast from the same register class that
	// automatically does the extract.
	multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
	X86VectorVTInfo SrcInfo> {
	def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
	(!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
	(EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
	}

	multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _, Predicate prd> {
	let Predicates = [prd] in {
	defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
	avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
	EVEX_V512;
	// Defined separately to avoid redefinition.
	defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
	}
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
	avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
	EVEX_V256;
	defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
	EVEX_V128;
	}
	}

	defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
	avx512vl_i8_info, HasBWI>;
	defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
	avx512vl_i16_info, HasBWI>;
	defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
	avx512vl_i32_info, HasAVX512>;
	defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
	avx512vl_i64_info, HasAVX512>, VEX_W;

	multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
	defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
	(_Dst.VT (X86SubVBroadcast
	(_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
	AVX5128IBase, EVEX;
	}

	let Predicates = [HasAVX512] in {
	// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
	def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
	(VPBROADCASTQZm addr:$src)>;
	}

	let Predicates = [HasVLX, HasBWI] in {
	// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
	def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
	(VPBROADCASTQZ128m addr:$src)>;
	def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
	(VPBROADCASTQZ256m addr:$src)>;
	// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
	// This means we'll encounter truncated i32 loads; match that here.
	def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
	(VPBROADCASTWZ128m addr:$src)>;
	def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
	(VPBROADCASTWZ256m addr:$src)>;
	def : Pat<(v8i16 (X86VBroadcast
	(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
	(VPBROADCASTWZ128m addr:$src)>;
	def : Pat<(v16i16 (X86VBroadcast
	(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
	(VPBROADCASTWZ256m addr:$src)>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 BROADCAST SUBVECTORS
	//

	defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
	v16i32_info, v4i32x_info>,
	EVEX_V512, EVEX_CD8<32, CD8VT4>;
	defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
	v16f32_info, v4f32x_info>,
	EVEX_V512, EVEX_CD8<32, CD8VT4>;
	defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
	v8i64_info, v4i64x_info>, VEX_W,
	EVEX_V512, EVEX_CD8<64, CD8VT4>;
	defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
	v8f64_info, v4f64x_info>, VEX_W,
	EVEX_V512, EVEX_CD8<64, CD8VT4>;

	let Predicates = [HasAVX512] in {
	def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
	(VBROADCASTI64X4rm addr:$src)>;
	def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
	(VBROADCASTI64X4rm addr:$src)>;

	// Provide fallback in case the load node that is used in the patterns above
	// is used by additional users, which prevents the pattern selection.
	def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
	(VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(v4f64 VR256X:$src), 1)>;
	def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
	(VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(v4i64 VR256X:$src), 1)>;
	def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
	(VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(v16i16 VR256X:$src), 1)>;
	def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
	(VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(v32i8 VR256X:$src), 1)>;

	def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
	(VBROADCASTI32X4rm addr:$src)>;
	def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
	(VBROADCASTI32X4rm addr:$src)>;
	}

	let Predicates = [HasVLX] in {
	defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
	v8i32x_info, v4i32x_info>,
	EVEX_V256, EVEX_CD8<32, CD8VT4>;
	defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
	v8f32x_info, v4f32x_info>,
	EVEX_V256, EVEX_CD8<32, CD8VT4>;

	def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
	(VBROADCASTI32X4Z256rm addr:$src)>;
	def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
	(VBROADCASTI32X4Z256rm addr:$src)>;

	// Provide fallback in case the load node that is used in the patterns above
	// is used by additional users, which prevents the pattern selection.
	def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
	(VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(v4f32 VR128X:$src), 1)>;
	def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
	(VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(v4i32 VR128X:$src), 1)>;
	def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
	(VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(v8i16 VR128X:$src), 1)>;
	def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
	(VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(v16i8 VR128X:$src), 1)>;
	}

	let Predicates = [HasVLX, HasDQI] in {
	defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
	v4i64x_info, v2i64x_info>, VEX_W,
	EVEX_V256, EVEX_CD8<64, CD8VT2>;
	defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
	v4f64x_info, v2f64x_info>, VEX_W,
	EVEX_V256, EVEX_CD8<64, CD8VT2>;

	// Provide fallback in case the load node that is used in the patterns above
	// is used by additional users, which prevents the pattern selection.
	def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
	(VINSERTF64x2Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(v2f64 VR128X:$src), 1)>;
	def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
	(VINSERTI64x2Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(v2i64 VR128X:$src), 1)>;
	}

	let Predicates = [HasVLX, NoDQI] in {
	def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
	(VBROADCASTF32X4Z256rm addr:$src)>;
	def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
	(VBROADCASTI32X4Z256rm addr:$src)>;

	// Provide fallback in case the load node that is used in the patterns above
	// is used by additional users, which prevents the pattern selection.
	def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
	(VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(v2f64 VR128X:$src), 1)>;
	def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
	(VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(v2i64 VR128X:$src), 1)>;
	}

	let Predicates = [HasAVX512, NoDQI] in {
	def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
	(VBROADCASTF32X4rm addr:$src)>;
	def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
	(VBROADCASTI32X4rm addr:$src)>;

	def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
	(VBROADCASTF64X4rm addr:$src)>;
	def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
	(VBROADCASTI64X4rm addr:$src)>;

	// Provide fallback in case the load node that is used in the patterns above
	// is used by additional users, which prevents the pattern selection.
	def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
	(VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(v8f32 VR256X:$src), 1)>;
	def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
	(VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(v8i32 VR256X:$src), 1)>;
	}

	let Predicates = [HasDQI] in {
	defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
	v8i64_info, v2i64x_info>, VEX_W,
	EVEX_V512, EVEX_CD8<64, CD8VT2>;
	defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti32x8",
	v16i32_info, v8i32x_info>,
	EVEX_V512, EVEX_CD8<32, CD8VT8>;
	defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
	v8f64_info, v2f64x_info>, VEX_W,
	EVEX_V512, EVEX_CD8<64, CD8VT2>;
	defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
	v16f32_info, v8f32x_info>,
	EVEX_V512, EVEX_CD8<32, CD8VT8>;

	// Provide fallback in case the load node that is used in the patterns above
	// is used by additional users, which prevents the pattern selection.
	def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
	(VINSERTF32x8Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(v8f32 VR256X:$src), 1)>;
	def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
	(VINSERTI32x8Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
	(v8i32 VR256X:$src), 1)>;
	}

	multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
	let Predicates = [HasDQI] in
	defm Z : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info512, _Src.info128>,
	EVEX_V512;
	let Predicates = [HasDQI, HasVLX] in
	defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info256, _Src.info128>,
	EVEX_V256;
	}

	multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
	avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {

	let Predicates = [HasDQI, HasVLX] in
	defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info128, _Src.info128>,
	EVEX_V128;
	}

	defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
	avx512vl_i32_info, avx512vl_i64_info>;
	defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
	avx512vl_f32_info, avx512vl_f64_info>;

	let Predicates = [HasVLX] in {
	def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
	(VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
	def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
	(VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
	}

	def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
	(VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
	def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
	(VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;

	def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
	(VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
	def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
	(VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;

	//===----------------------------------------------------------------------===//
	// AVX-512 BROADCAST MASK TO VECTOR REGISTER
	//---
	multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo _, RegisterClass KRC> {
	def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX;
	}

	multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
	let Predicates = [HasCDI] in
	defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
	let Predicates = [HasCDI, HasVLX] in {
	defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
	defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
	}
	}

	defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
	avx512vl_i32_info, VK16>;
	defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
	avx512vl_i64_info, VK8>, VEX_W;

	//===----------------------------------------------------------------------===//
	// -- VPERMI2 - 3 source operands form --
	multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
	// The index operand in the pattern should really be an integer type. However,
	// if we do that and it happens to come from a bitcast, then it becomes
	// difficult to find the bitcast needed to convert the index to the
	// destination type for the passthru since it will be folded with the bitcast
	// of the index operand.
	defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)), 1>, EVEX_4V,
	AVX5128IBase;

	defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
	(_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
	EVEX_4V, AVX5128IBase;
	}
	}
	multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo _> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
	defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
	!strconcat("$src2, ${src3}", _.BroadcastStr ),
	(_.VT (X86VPermi2X _.RC:$src1,
	_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
	1>, AVX5128IBase, EVEX_4V, EVEX_B;
	}

	multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo> {
	defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>,
	avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
	let Predicates = [HasVLX] in {
	defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>,
	avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
	defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>,
	avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
	}
	}

	multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo,
	Predicate Prd> {
	let Predicates = [Prd] in
	defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
	let Predicates = [Prd, HasVLX] in {
	defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
	defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
	}
	}

	defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d",
	avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
	defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q",
	avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
	defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w",
	avx512vl_i16_info, HasBWI>,
	VEX_W, EVEX_CD8<16, CD8VF>;
	defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b",
	avx512vl_i8_info, HasVBMI>,
	EVEX_CD8<8, CD8VF>;
	defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
	avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
	defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
	avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;

	// VPERMT2
	multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
	defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins IdxVT.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
	EVEX_4V, AVX5128IBase;

	defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins IdxVT.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
	(bitconvert (_.LdFrag addr:$src3)))), 1>,
	EVEX_4V, AVX5128IBase;
	}
	}
	multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
	defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
	!strconcat("$src2, ${src3}", _.BroadcastStr ),
	(_.VT (X86VPermt2 _.RC:$src1,
	IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
	1>, AVX5128IBase, EVEX_4V, EVEX_B;
	}

	multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo,
	AVX512VLVectorVTInfo ShuffleMask> {
	defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
	ShuffleMask.info512>,
	avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info512,
	ShuffleMask.info512>, EVEX_V512;
	let Predicates = [HasVLX] in {
	defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
	ShuffleMask.info128>,
	avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info128,
	ShuffleMask.info128>, EVEX_V128;
	defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
	ShuffleMask.info256>,
	avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info256,
	ShuffleMask.info256>, EVEX_V256;
	}
	}

	multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo,
	AVX512VLVectorVTInfo Idx,
	Predicate Prd> {
	let Predicates = [Prd] in
	defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
	Idx.info512>, EVEX_V512;
	let Predicates = [Prd, HasVLX] in {
	defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
	Idx.info128>, EVEX_V128;
	defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
	Idx.info256>, EVEX_V256;
	}
	}

	defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d",
	avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
	defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q",
	avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
	defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w",
	avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
	VEX_W, EVEX_CD8<16, CD8VF>;
	defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b",
	avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
	EVEX_CD8<8, CD8VF>;
	defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps",
	avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
	defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd",
	avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;

	//===----------------------------------------------------------------------===//
	// AVX-512 - BLEND using mask
	//
	multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
	def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, ${dst}\|${dst}, $src1, $src2}"),
	[]>, EVEX_4V;
	def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, ${dst} {${mask}}\|${dst} {${mask}}, $src1, $src2}"),
	[]>, EVEX_4V, EVEX_K;
	def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, ${dst} {${mask}} {z}\|${dst} {${mask}} {z}, $src1, $src2}"),
	[]>, EVEX_4V, EVEX_KZ;
	let mayLoad = 1 in {
	def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, ${dst}\|${dst}, $src1, $src2}"),
	[]>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
	def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, ${dst} {${mask}}\|${dst} {${mask}}, $src1, $src2}"),
	[]>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
	def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, ${dst} {${mask}} {z}\|${dst} {${mask}} {z}, $src1, $src2}"),
	[]>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>;
	}
	}
	}
	multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {

	let mayLoad = 1, hasSideEffects = 0 in {
	def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
	[]>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;

	def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{${src2}", _.BroadcastStr, ", $src1, $dst\|",
	"$dst, $src1, ${src2}", _.BroadcastStr, "}"),
	[]>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
	}
	}

	multiclass blendmask_dq <bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo> {
	defm Z : avx512_blendmask <opc, OpcodeStr, VTInfo.info512>,
	avx512_blendmask_rmb <opc, OpcodeStr, VTInfo.info512>, EVEX_V512;

	let Predicates = [HasVLX] in {
	defm Z256 : avx512_blendmask<opc, OpcodeStr, VTInfo.info256>,
	avx512_blendmask_rmb <opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
	defm Z128 : avx512_blendmask<opc, OpcodeStr, VTInfo.info128>,
	avx512_blendmask_rmb <opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
	}
	}

	multiclass blendmask_bw <bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo> {
	let Predicates = [HasBWI] in
	defm Z : avx512_blendmask <opc, OpcodeStr, VTInfo.info512>, EVEX_V512;

	let Predicates = [HasBWI, HasVLX] in {
	defm Z256 : avx512_blendmask <opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
	defm Z128 : avx512_blendmask <opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
	}
	}


	defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", avx512vl_f32_info>;
	defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", avx512vl_f64_info>, VEX_W;
	defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", avx512vl_i32_info>;
	defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", avx512vl_i64_info>, VEX_W;
	defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>;
	defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W;


	//===----------------------------------------------------------------------===//
	// Compare Instructions
	//===----------------------------------------------------------------------===//

	// avx512_cmp_scalar - AVX512 CMPSS and CMPSD

	multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>{

	defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
	"vcmp${cc}"#_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	imm:$cc)>, EVEX_4V;
	let mayLoad = 1 in
	defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
	"vcmp${cc}"#_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
	imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;

	defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
	"vcmp${cc}"#_.Suffix,
	"{sae}, $src2, $src1", "$src1, $src2, {sae}",
	(OpNodeRnd (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	imm:$cc,
	(i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B;
	// Accept explicit immediate argument form instead of comparison code.
	let isAsmParserOnly = 1, hasSideEffects = 0 in {
	defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
	(outs VK1:$dst),
	(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V;
	let mayLoad = 1 in
	defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, $src2, $src1", "$src1, $src2, $cc">,
	EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;

	defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
	EVEX_4V, EVEX_B;
	}// let isAsmParserOnly = 1, hasSideEffects = 0

	let isCodeGenOnly = 1 in {
	let isCommutable = 1 in
	def rr : AVX512Ii8<0xC2, MRMSrcReg,
	(outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
	!strconcat("vcmp${cc}", _.Suffix,
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.KRC:$dst, (OpNode _.FRC:$src1,
	_.FRC:$src2,
	imm:$cc))],
	IIC_SSE_ALU_F32S_RR>, EVEX_4V;
	def rm : AVX512Ii8<0xC2, MRMSrcMem,
	(outs _.KRC:$dst),
	(ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
	!strconcat("vcmp${cc}", _.Suffix,
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.KRC:$dst, (OpNode _.FRC:$src1,
	(_.ScalarLdFrag addr:$src2),
	imm:$cc))],
	IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
	}
	}

	let Predicates = [HasAVX512] in {
	let ExeDomain = SSEPackedSingle in
	defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd>,
	AVX512XSIi8Base;
	let ExeDomain = SSEPackedDouble in
	defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd>,
	AVX512XDIi8Base, VEX_W;
	}

	multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, bit IsCommutable> {
	let isCommutable = IsCommutable in
	def rr : AVX512BI<opc, MRMSrcReg,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
	IIC_SSE_ALU_F32P_RR>, EVEX_4V;
	def rm : AVX512BI<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
	(_.VT (bitconvert (_.LdFrag addr:$src2)))))],
	IIC_SSE_ALU_F32P_RM>, EVEX_4V;
	let isCommutable = IsCommutable in
	def rrk : AVX512BI<opc, MRMSrcReg,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, $src2}"),
	[(set _.KRC:$dst, (and _.KRCWM:$mask,
	(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
	IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
	def rmk : AVX512BI<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, $src2}"),
	[(set _.KRC:$dst, (and _.KRCWM:$mask,
	(OpNode (_.VT _.RC:$src1),
	(_.VT (bitconvert
	(_.LdFrag addr:$src2))))))],
	IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
	}

	multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, bit IsCommutable> :
	avx512_icmp_packed<opc, OpcodeStr, OpNode, _, IsCommutable> {
	def rmb : AVX512BI<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
	!strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
	"\|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
	[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
	(X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
	IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
	def rmbk : AVX512BI<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
	_.ScalarMemOp:$src2),
	!strconcat(OpcodeStr,
	"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
	[(set _.KRC:$dst, (and _.KRCWM:$mask,
	(OpNode (_.VT _.RC:$src1),
	(X86VBroadcast
	(_.ScalarLdFrag addr:$src2)))))],
	IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
	}

	multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo, Predicate prd,
	bit IsCommutable = 0> {
	let Predicates = [prd] in
	defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512,
	IsCommutable>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256,
	IsCommutable>, EVEX_V256;
	defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128,
	IsCommutable>, EVEX_V128;
	}
	}

	multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
	SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
	Predicate prd, bit IsCommutable = 0> {
	let Predicates = [prd] in
	defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512,
	IsCommutable>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
	IsCommutable>, EVEX_V256;
	defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
	IsCommutable>, EVEX_V128;
	}
	}

	defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
	avx512vl_i8_info, HasBWI, 1>,
	EVEX_CD8<8, CD8VF>;

	defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
	avx512vl_i16_info, HasBWI, 1>,
	EVEX_CD8<16, CD8VF>;

	defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
	avx512vl_i32_info, HasAVX512, 1>,
	EVEX_CD8<32, CD8VF>;

	defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
	avx512vl_i64_info, HasAVX512, 1>,
	T8PD, VEX_W, EVEX_CD8<64, CD8VF>;

	defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
	avx512vl_i8_info, HasBWI>,
	EVEX_CD8<8, CD8VF>;

	defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
	avx512vl_i16_info, HasBWI>,
	EVEX_CD8<16, CD8VF>;

	defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
	avx512vl_i32_info, HasAVX512>,
	EVEX_CD8<32, CD8VF>;

	defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
	avx512vl_i64_info, HasAVX512>,
	T8PD, VEX_W, EVEX_CD8<64, CD8VF>;


	multiclass avx512_icmp_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
	SDNode OpNode, string InstrStr,
	list<Predicate> Preds> {
	let Predicates = Preds in {
	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2),
	NewInf.KRC)>;

	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (OpNode (_.VT _.RC:$src1),
	(_.VT (bitconvert (_.LdFrag addr:$src2))))),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2),
	NewInf.KRC)>;

	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (and _.KRCWM:$mask,
	(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask,
	_.RC:$src1, _.RC:$src2),
	NewInf.KRC)>;

	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (and (_.KVT _.KRCWM:$mask),
	(_.KVT (OpNode (_.VT _.RC:$src1),
	(_.VT (bitconvert
	(_.LdFrag addr:$src2))))))),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask,
	_.RC:$src1, addr:$src2),
	NewInf.KRC)>;
	}
	}

	multiclass avx512_icmp_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
	SDNode OpNode, string InstrStr,
	list<Predicate> Preds>
	: avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
	let Predicates = Preds in {
	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (OpNode (_.VT _.RC:$src1),
	(X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2),
	NewInf.KRC)>;

	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (and (_.KVT _.KRCWM:$mask),
	(_.KVT (OpNode (_.VT _.RC:$src1),
	(X86VBroadcast
	(_.ScalarLdFrag addr:$src2)))))),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbk) _.KRCWM:$mask,
	_.RC:$src1, addr:$src2),
	NewInf.KRC)>;
	}
	}

	// VPCMPEQB - i8
	defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpeqm,
	"VPCMPEQBZ128", [HasBWI, HasVLX]>;
	defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpeqm,
	"VPCMPEQBZ128", [HasBWI, HasVLX]>;

	defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpeqm,
	"VPCMPEQBZ256", [HasBWI, HasVLX]>;

	// VPCMPEQW - i16
	defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpeqm,
	"VPCMPEQWZ128", [HasBWI, HasVLX]>;
	defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpeqm,
	"VPCMPEQWZ128", [HasBWI, HasVLX]>;
	defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpeqm,
	"VPCMPEQWZ128", [HasBWI, HasVLX]>;

	defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpeqm,
	"VPCMPEQWZ256", [HasBWI, HasVLX]>;
	defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpeqm,
	"VPCMPEQWZ256", [HasBWI, HasVLX]>;

	defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpeqm,
	"VPCMPEQWZ", [HasBWI]>;

	// VPCMPEQD - i32
	defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpeqm,
	"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpeqm,
	"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpeqm,
	"VPCMPEQDZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpeqm,
	"VPCMPEQDZ128", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpeqm,
	"VPCMPEQDZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpeqm,
	"VPCMPEQDZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpeqm,
	"VPCMPEQDZ256", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpeqm,
	"VPCMPEQDZ", [HasAVX512]>;
	defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpeqm,
	"VPCMPEQDZ", [HasAVX512]>;

	// VPCMPEQQ - i64
	defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpeqm,
	"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpeqm,
	"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpeqm,
	"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpeqm,
	"VPCMPEQQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpeqm,
	"VPCMPEQQZ128", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpeqm,
	"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpeqm,
	"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm,
	"VPCMPEQQZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm,
	"VPCMPEQQZ256", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm,
	"VPCMPEQQZ", [HasAVX512]>;
	defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm,
	"VPCMPEQQZ", [HasAVX512]>;
	defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpeqm,
	"VPCMPEQQZ", [HasAVX512]>;

	// VPCMPGTB - i8
	defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpgtm,
	"VPCMPGTBZ128", [HasBWI, HasVLX]>;
	defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpgtm,
	"VPCMPGTBZ128", [HasBWI, HasVLX]>;

	defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpgtm,
	"VPCMPGTBZ256", [HasBWI, HasVLX]>;

	// VPCMPGTW - i16
	defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpgtm,
	"VPCMPGTWZ128", [HasBWI, HasVLX]>;
	defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpgtm,
	"VPCMPGTWZ128", [HasBWI, HasVLX]>;
	defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpgtm,
	"VPCMPGTWZ128", [HasBWI, HasVLX]>;

	defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpgtm,
	"VPCMPGTWZ256", [HasBWI, HasVLX]>;
	defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpgtm,
	"VPCMPGTWZ256", [HasBWI, HasVLX]>;

	defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpgtm,
	"VPCMPGTWZ", [HasBWI]>;

	// VPCMPGTD - i32
	defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpgtm,
	"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpgtm,
	"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpgtm,
	"VPCMPGTDZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpgtm,
	"VPCMPGTDZ128", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpgtm,
	"VPCMPGTDZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpgtm,
	"VPCMPGTDZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpgtm,
	"VPCMPGTDZ256", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpgtm,
	"VPCMPGTDZ", [HasAVX512]>;
	defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpgtm,
	"VPCMPGTDZ", [HasAVX512]>;

	// VPCMPGTQ - i64
	defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpgtm,
	"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpgtm,
	"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpgtm,
	"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpgtm,
	"VPCMPGTQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpgtm,
	"VPCMPGTQZ128", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpgtm,
	"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpgtm,
	"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpgtm,
	"VPCMPGTQZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpgtm,
	"VPCMPGTQZ256", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpgtm,
	"VPCMPGTQZ", [HasAVX512]>;
	defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpgtm,
	"VPCMPGTQZ", [HasAVX512]>;
	defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpgtm,
	"VPCMPGTQZ", [HasAVX512]>;

	multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
	X86VectorVTInfo _> {
	let isCommutable = 1 in
	def rri : AVX512AIi8<opc, MRMSrcReg,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
	!strconcat("vpcmp${cc}", Suffix,
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	imm:$cc))],
	IIC_SSE_ALU_F32P_RR>, EVEX_4V;
	def rmi : AVX512AIi8<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
	!strconcat("vpcmp${cc}", Suffix,
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
	(_.VT (bitconvert (_.LdFrag addr:$src2))),
	imm:$cc))],
	IIC_SSE_ALU_F32P_RM>, EVEX_4V;
	let isCommutable = 1 in
	def rrik : AVX512AIi8<opc, MRMSrcReg,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
	AVX512ICC:$cc),
	!strconcat("vpcmp${cc}", Suffix,
	"\t{$src2, $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, $src2}"),
	[(set _.KRC:$dst, (and _.KRCWM:$mask,
	(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	imm:$cc)))],
	IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
	def rmik : AVX512AIi8<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
	AVX512ICC:$cc),
	!strconcat("vpcmp${cc}", Suffix,
	"\t{$src2, $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, $src2}"),
	[(set _.KRC:$dst, (and _.KRCWM:$mask,
	(OpNode (_.VT _.RC:$src1),
	(_.VT (bitconvert (_.LdFrag addr:$src2))),
	imm:$cc)))],
	IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;

	// Accept explicit immediate argument form instead of comparison code.
	let isAsmParserOnly = 1, hasSideEffects = 0 in {
	def rri_alt : AVX512AIi8<opc, MRMSrcReg,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
	!strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst\|",
	"$dst, $src1, $src2, $cc}"),
	[], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
	let mayLoad = 1 in
	def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
	!strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst\|",
	"$dst, $src1, $src2, $cc}"),
	[], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
	def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
	u8imm:$cc),
	!strconcat("vpcmp", Suffix,
	"\t{$cc, $src2, $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, $src2, $cc}"),
	[], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
	let mayLoad = 1 in
	def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
	u8imm:$cc),
	!strconcat("vpcmp", Suffix,
	"\t{$cc, $src2, $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, $src2, $cc}"),
	[], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
	}
	}

	multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
	X86VectorVTInfo _> :
	avx512_icmp_cc<opc, Suffix, OpNode, _> {
	def rmib : AVX512AIi8<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
	AVX512ICC:$cc),
	!strconcat("vpcmp${cc}", Suffix,
	"\t{${src2}", _.BroadcastStr, ", $src1, $dst\|",
	"$dst, $src1, ${src2}", _.BroadcastStr, "}"),
	[(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
	(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
	imm:$cc))],
	IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
	def rmibk : AVX512AIi8<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
	_.ScalarMemOp:$src2, AVX512ICC:$cc),
	!strconcat("vpcmp${cc}", Suffix,
	"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
	[(set _.KRC:$dst, (and _.KRCWM:$mask,
	(OpNode (_.VT _.RC:$src1),
	(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
	imm:$cc)))],
	IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;

	// Accept explicit immediate argument form instead of comparison code.
	let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
	def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
	u8imm:$cc),
	!strconcat("vpcmp", Suffix,
	"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst\|",
	"$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
	[], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
	def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
	(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
	_.ScalarMemOp:$src2, u8imm:$cc),
	!strconcat("vpcmp", Suffix,
	"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
	[], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
	}
	}

	multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo, Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
	defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
	}
	}

	multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo, Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
	EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
	EVEX_V256;
	defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
	EVEX_V128;
	}
	}

	defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
	HasBWI>, EVEX_CD8<8, CD8VF>;
	defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
	HasBWI>, EVEX_CD8<8, CD8VF>;

	defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
	HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
	defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
	HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;

	defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
	HasAVX512>, EVEX_CD8<32, CD8VF>;
	defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
	HasAVX512>, EVEX_CD8<32, CD8VF>;

	defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
	HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
	defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
	HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;

	multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
	SDNode OpNode, string InstrStr,
	list<Predicate> Preds> {
	let Predicates = Preds in {
	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	imm:$cc)),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
	_.RC:$src2,
	imm:$cc),
	NewInf.KRC)>;

	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (OpNode (_.VT _.RC:$src1),
	(_.VT (bitconvert (_.LdFrag addr:$src2))),
	imm:$cc)),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
	addr:$src2,
	imm:$cc),
	NewInf.KRC)>;

	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (and _.KRCWM:$mask,
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	imm:$cc))),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask,
	_.RC:$src1,
	_.RC:$src2,
	imm:$cc),
	NewInf.KRC)>;

	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (and (_.KVT _.KRCWM:$mask),
	(_.KVT (OpNode (_.VT _.RC:$src1),
	(_.VT (bitconvert
	(_.LdFrag addr:$src2))),
	imm:$cc)))),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask,
	_.RC:$src1,
	addr:$src2,
	imm:$cc),
	NewInf.KRC)>;
	}
	}

	multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
	SDNode OpNode, string InstrStr,
	list<Predicate> Preds>
	: avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
	let Predicates = Preds in {
	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (OpNode (_.VT _.RC:$src1),
	(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
	imm:$cc)),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmib) _.RC:$src1,
	addr:$src2,
	imm:$cc),
	NewInf.KRC)>;

	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (and (_.KVT _.KRCWM:$mask),
	(_.KVT (OpNode (_.VT _.RC:$src1),
	(X86VBroadcast
	(_.ScalarLdFrag addr:$src2)),
	imm:$cc)))),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmibk) _.KRCWM:$mask,
	_.RC:$src1,
	addr:$src2,
	imm:$cc),
	NewInf.KRC)>;
	}
	}

	// VPCMPB - i8
	defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpm,
	"VPCMPBZ128", [HasBWI, HasVLX]>;
	defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpm,
	"VPCMPBZ128", [HasBWI, HasVLX]>;

	defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpm,
	"VPCMPBZ256", [HasBWI, HasVLX]>;

	// VPCMPW - i16
	defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpm,
	"VPCMPWZ128", [HasBWI, HasVLX]>;
	defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpm,
	"VPCMPWZ128", [HasBWI, HasVLX]>;
	defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpm,
	"VPCMPWZ128", [HasBWI, HasVLX]>;

	defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpm,
	"VPCMPWZ256", [HasBWI, HasVLX]>;
	defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpm,
	"VPCMPWZ256", [HasBWI, HasVLX]>;

	defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpm,
	"VPCMPWZ", [HasBWI]>;

	// VPCMPD - i32
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpm,
	"VPCMPDZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpm,
	"VPCMPDZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpm,
	"VPCMPDZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpm,
	"VPCMPDZ128", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpm,
	"VPCMPDZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpm,
	"VPCMPDZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpm,
	"VPCMPDZ256", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpm,
	"VPCMPDZ", [HasAVX512]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpm,
	"VPCMPDZ", [HasAVX512]>;

	// VPCMPQ - i64
	defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpm,
	"VPCMPQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpm,
	"VPCMPQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpm,
	"VPCMPQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpm,
	"VPCMPQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpm,
	"VPCMPQZ128", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpm,
	"VPCMPQZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpm,
	"VPCMPQZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpm,
	"VPCMPQZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpm,
	"VPCMPQZ256", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpm,
	"VPCMPQZ", [HasAVX512]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpm,
	"VPCMPQZ", [HasAVX512]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpm,
	"VPCMPQZ", [HasAVX512]>;

	// VPCMPUB - i8
	defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpmu,
	"VPCMPUBZ128", [HasBWI, HasVLX]>;
	defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpmu,
	"VPCMPUBZ128", [HasBWI, HasVLX]>;

	defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpmu,
	"VPCMPUBZ256", [HasBWI, HasVLX]>;

	// VPCMPUW - i16
	defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpmu,
	"VPCMPUWZ128", [HasBWI, HasVLX]>;
	defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpmu,
	"VPCMPUWZ128", [HasBWI, HasVLX]>;
	defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpmu,
	"VPCMPUWZ128", [HasBWI, HasVLX]>;

	defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpmu,
	"VPCMPUWZ256", [HasBWI, HasVLX]>;
	defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpmu,
	"VPCMPUWZ256", [HasBWI, HasVLX]>;

	defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpmu,
	"VPCMPUWZ", [HasBWI]>;

	// VPCMPUD - i32
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpmu,
	"VPCMPUDZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpmu,
	"VPCMPUDZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpmu,
	"VPCMPUDZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpmu,
	"VPCMPUDZ128", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpmu,
	"VPCMPUDZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpmu,
	"VPCMPUDZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpmu,
	"VPCMPUDZ256", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpmu,
	"VPCMPUDZ", [HasAVX512]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpmu,
	"VPCMPUDZ", [HasAVX512]>;

	// VPCMPUQ - i64
	defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpmu,
	"VPCMPUQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpmu,
	"VPCMPUQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpmu,
	"VPCMPUQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpmu,
	"VPCMPUQZ128", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpmu,
	"VPCMPUQZ128", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpmu,
	"VPCMPUQZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpmu,
	"VPCMPUQZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpmu,
	"VPCMPUQZ256", [HasAVX512, HasVLX]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpmu,
	"VPCMPUQZ256", [HasAVX512, HasVLX]>;

	defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpmu,
	"VPCMPUQZ", [HasAVX512]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpmu,
	"VPCMPUQZ", [HasAVX512]>;
	defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpmu,
	"VPCMPUQZ", [HasAVX512]>;

	multiclass avx512_vcmp_common<X86VectorVTInfo _> {

	defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
	(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
	"vcmp${cc}"#_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(X86cmpm (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	imm:$cc), 1>;

	defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
	(outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
	"vcmp${cc}"#_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(X86cmpm (_.VT _.RC:$src1),
	(_.VT (bitconvert (_.LdFrag addr:$src2))),
	imm:$cc)>;

	defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
	"vcmp${cc}"#_.Suffix,
	"${src2}"##_.BroadcastStr##", $src1",
	"$src1, ${src2}"##_.BroadcastStr,
	(X86cmpm (_.VT _.RC:$src1),
	(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
	imm:$cc)>,EVEX_B;
	// Accept explicit immediate argument form instead of comparison code.
	let isAsmParserOnly = 1, hasSideEffects = 0 in {
	defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, $src2, $src1", "$src1, $src2, $cc">;

	let mayLoad = 1 in {
	defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, $src2, $src1", "$src1, $src2, $cc">;

	defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, ${src2}"##_.BroadcastStr##", $src1",
	"$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B;
	}
	}
	}

	multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
	// comparison code form (VCMP[EQ/LT/LE/...]
	defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
	(outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
	"vcmp${cc}"#_.Suffix,
	"{sae}, $src2, $src1", "$src1, $src2, {sae}",
	(X86cmpmRnd (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	imm:$cc,
	(i32 FROUND_NO_EXC))>, EVEX_B;

	let isAsmParserOnly = 1, hasSideEffects = 0 in {
	defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
	(outs _.KRC:$dst),
	(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
	"vcmp"#_.Suffix,
	"$cc, {sae}, $src2, $src1",
	"$src1, $src2, {sae}, $cc">, EVEX_B;
	}
	}

	multiclass avx512_vcmp<AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_vcmp_common<_.info512>,
	avx512_vcmp_sae<_.info512>, EVEX_V512;

	}
	let Predicates = [HasAVX512,HasVLX] in {
	defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128;
	defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256;
	}
	}

	defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
	AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
	defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
	AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;

	multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
	string InstrStr, list<Predicate> Preds> {
	let Predicates = Preds in {
	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (X86cmpm (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	imm:$cc)),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
	_.RC:$src2,
	imm:$cc),
	NewInf.KRC)>;

	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (X86cmpm (_.VT _.RC:$src1),
	(_.VT (bitconvert (_.LdFrag addr:$src2))),
	imm:$cc)),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
	addr:$src2,
	imm:$cc),
	NewInf.KRC)>;

	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (X86cmpm (_.VT _.RC:$src1),
	(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
	imm:$cc)),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbi) _.RC:$src1,
	addr:$src2,
	imm:$cc),
	NewInf.KRC)>;
	}
	}

	multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
	string InstrStr, list<Predicate> Preds>
	: avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> {

	let Predicates = Preds in
	def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
	(_.KVT (X86cmpmRnd (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	imm:$cc,
	(i32 FROUND_NO_EXC))),
	(i64 0)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1,
	_.RC:$src2,
	imm:$cc),
	NewInf.KRC)>;
	}


	// VCMPPS - f32
	defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v8i1_info, "VCMPPSZ128",
	[HasAVX512, HasVLX]>;
	defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v16i1_info, "VCMPPSZ128",
	[HasAVX512, HasVLX]>;
	defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v32i1_info, "VCMPPSZ128",
	[HasAVX512, HasVLX]>;
	defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v64i1_info, "VCMPPSZ128",
	[HasAVX512, HasVLX]>;

	defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v16i1_info, "VCMPPSZ256",
	[HasAVX512, HasVLX]>;
	defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v32i1_info, "VCMPPSZ256",
	[HasAVX512, HasVLX]>;
	defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v64i1_info, "VCMPPSZ256",
	[HasAVX512, HasVLX]>;

	defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v32i1_info, "VCMPPSZ",
	[HasAVX512]>;
	defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v64i1_info, "VCMPPSZ",
	[HasAVX512]>;

	// VCMPPD - f64
	defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v4i1_info, "VCMPPDZ128",
	[HasAVX512, HasVLX]>;
	defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v8i1_info, "VCMPPDZ128",
	[HasAVX512, HasVLX]>;
	defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v16i1_info, "VCMPPDZ128",
	[HasAVX512, HasVLX]>;
	defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v32i1_info, "VCMPPDZ128",
	[HasAVX512, HasVLX]>;
	defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v64i1_info, "VCMPPDZ128",
	[HasAVX512, HasVLX]>;

	defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v8i1_info, "VCMPPDZ256",
	[HasAVX512, HasVLX]>;
	defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v16i1_info, "VCMPPDZ256",
	[HasAVX512, HasVLX]>;
	defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v32i1_info, "VCMPPDZ256",
	[HasAVX512, HasVLX]>;
	defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v64i1_info, "VCMPPDZ256",
	[HasAVX512, HasVLX]>;

	defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v16i1_info, "VCMPPDZ",
	[HasAVX512]>;
	defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v32i1_info, "VCMPPDZ",
	[HasAVX512]>;
	defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v64i1_info, "VCMPPDZ",
	[HasAVX512]>;

	// ----------------------------------------------------------------
	// FPClass
	//handle fpclass instruction mask = op(reg_scalar,imm)
	// op(mem_scalar,imm)
	multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, Predicate prd> {
	let Predicates = [prd] in {
	def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
	(ins _.RC:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
	(i32 imm:$src2)))], NoItinerary>;
	def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix#
	"\t{$src2, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, $src2}",
	[(set _.KRC:$dst,(or _.KRCWM:$mask,
	(OpNode (_.VT _.RC:$src1),
	(i32 imm:$src2))))], NoItinerary>, EVEX_K;
	def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
	(ins _.MemOp:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix##
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.KRC:$dst,
	(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
	(i32 imm:$src2)))], NoItinerary>;
	def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
	(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix##
	"\t{$src2, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, $src2}",
	[(set _.KRC:$dst,(or _.KRCWM:$mask,
	(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
	(i32 imm:$src2))))], NoItinerary>, EVEX_K;
	}
	}

	//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
	// fpclass(reg_vec, mem_vec, imm)
	// fpclass(reg_vec, broadcast(eltVt), imm)
	multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, string mem, string broadcast>{
	def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
	(ins _.RC:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
	(i32 imm:$src2)))], NoItinerary>;
	def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix#
	"\t{$src2, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, $src2}",
	[(set _.KRC:$dst,(or _.KRCWM:$mask,
	(OpNode (_.VT _.RC:$src1),
	(i32 imm:$src2))))], NoItinerary>, EVEX_K;
	def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
	(ins _.MemOp:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix##mem#
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.KRC:$dst,(OpNode
	(_.VT (bitconvert (_.LdFrag addr:$src1))),
	(i32 imm:$src2)))], NoItinerary>;
	def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
	(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix##mem#
	"\t{$src2, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, $src2}",
	[(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
	(_.VT (bitconvert (_.LdFrag addr:$src1))),
	(i32 imm:$src2))))], NoItinerary>, EVEX_K;
	def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
	(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
	_.BroadcastStr##", $dst\|$dst, ${src1}"
	##_.BroadcastStr##", $src2}",
	[(set _.KRC:$dst,(OpNode
	(_.VT (X86VBroadcast
	(_.ScalarLdFrag addr:$src1))),
	(i32 imm:$src2)))], NoItinerary>,EVEX_B;
	def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
	(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
	_.BroadcastStr##", $dst {${mask}}\|$dst {${mask}}, ${src1}"##
	_.BroadcastStr##", $src2}",
	[(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
	(_.VT (X86VBroadcast
	(_.ScalarLdFrag addr:$src1))),
	(i32 imm:$src2))))], NoItinerary>,
	EVEX_B, EVEX_K;
	}

	multiclass avx512_vector_fpclass_all<string OpcodeStr,
	AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd,
	string broadcast>{
	let Predicates = [prd] in {
	defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info512, "{z}",
	broadcast>, EVEX_V512;
	}
	let Predicates = [prd, HasVLX] in {
	defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info128, "{x}",
	broadcast>, EVEX_V128;
	defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info256, "{y}",
	broadcast>, EVEX_V256;
	}
	}

	multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
	bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
	defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
	VecOpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>;
	defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
	VecOpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W;
	defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
	f32x_info, prd>, EVEX_CD8<32, CD8VT1>;
	defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
	f64x_info, prd>, EVEX_CD8<64, CD8VT1>, VEX_W;
	}

	defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
	X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;

	//-----------------------------------------------------------------
	// Mask register copy, including
	// - copy between mask registers
	// - load/store mask registers
	// - copy from GPR to mask register and vice versa
	//
	multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
	string OpcodeStr, RegisterClass KRC,
	ValueType vvt, X86MemOperand x86memop> {
	let hasSideEffects = 0 in
	def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"), []>;
	def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(set KRC:$dst, (vvt (load addr:$src)))]>;
	def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(store KRC:$src, addr:$dst)]>;
	}

	multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
	string OpcodeStr,
	RegisterClass KRC, RegisterClass GRC> {
	let hasSideEffects = 0 in {
	def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"), []>;
	def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"), []>;
	}
	}

	let Predicates = [HasDQI] in
	defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
	avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
	VEX, PD;

	let Predicates = [HasAVX512] in
	defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
	avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
	VEX, PS;

	let Predicates = [HasBWI] in {
	defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
	VEX, PD, VEX_W;
	defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
	VEX, XD;
	defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
	VEX, PS, VEX_W;
	defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
	VEX, XD, VEX_W;
	}

	// GR from/to mask register
	def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
	def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;

	def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
	def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;

	def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
	(KMOVWrk VK16:$src)>;
	def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
	(COPY_TO_REGCLASS VK16:$src, GR32)>;

	def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
	(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>;
	def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
	(KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
	def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
	(COPY_TO_REGCLASS VK8:$src, GR32)>;

	def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
	(COPY_TO_REGCLASS GR32:$src, VK32)>;
	def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
	(COPY_TO_REGCLASS VK32:$src, GR32)>;
	def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
	(COPY_TO_REGCLASS GR64:$src, VK64)>;
	def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
	(COPY_TO_REGCLASS VK64:$src, GR64)>;

	// Load/store kreg
	let Predicates = [HasDQI] in {
	def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
	(KMOVBmk addr:$dst, VK8:$src)>;
	def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
	(KMOVBkm addr:$src)>;

	def : Pat<(store VK4:$src, addr:$dst),
	(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
	def : Pat<(store VK2:$src, addr:$dst),
	(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
	def : Pat<(store VK1:$src, addr:$dst),
	(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;

	def : Pat<(v2i1 (load addr:$src)),
	(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
	def : Pat<(v4i1 (load addr:$src)),
	(COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
	}
	let Predicates = [HasAVX512, NoDQI] in {
	def : Pat<(store VK1:$src, addr:$dst),
	(MOV8mr addr:$dst,
	(i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)),
	sub_8bit)))>;
	def : Pat<(store VK2:$src, addr:$dst),
	(MOV8mr addr:$dst,
	(i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK2:$src, GR32)),
	sub_8bit)))>;
	def : Pat<(store VK4:$src, addr:$dst),
	(MOV8mr addr:$dst,
	(i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK4:$src, GR32)),
	sub_8bit)))>;
	def : Pat<(store VK8:$src, addr:$dst),
	(MOV8mr addr:$dst,
	(i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)),
	sub_8bit)))>;

	def : Pat<(v8i1 (load addr:$src)),
	(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
	def : Pat<(v2i1 (load addr:$src)),
	(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
	def : Pat<(v4i1 (load addr:$src)),
	(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
	}

	let Predicates = [HasAVX512] in {
	def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
	(KMOVWmk addr:$dst, VK16:$src)>;
	def : Pat<(v1i1 (load addr:$src)),
	(COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
	def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
	(KMOVWkm addr:$src)>;
	}
	let Predicates = [HasBWI] in {
	def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
	(KMOVDmk addr:$dst, VK32:$src)>;
	def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))),
	(KMOVDkm addr:$src)>;
	def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
	(KMOVQmk addr:$dst, VK64:$src)>;
	def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))),
	(KMOVQkm addr:$src)>;
	}

	let Predicates = [HasAVX512] in {
	multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
	def : Pat<(maskVT (scalar_to_vector GR32:$src)),
	(COPY_TO_REGCLASS GR32:$src, maskRC)>;

	def : Pat<(i32 (X86Vextract maskRC:$src, (iPTR 0))),
	(COPY_TO_REGCLASS maskRC:$src, GR32)>;

	def : Pat<(maskVT (scalar_to_vector GR8:$src)),
	(COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;

	def : Pat<(i8 (X86Vextract maskRC:$src, (iPTR 0))),
	(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;

	def : Pat<(i32 (anyext (i8 (X86Vextract maskRC:$src, (iPTR 0))))),
	(COPY_TO_REGCLASS maskRC:$src, GR32)>;
	}

	defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
	defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
	defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
	defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
	defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
	defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
	defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;

	def : Pat<(X86kshiftr (X86kshiftl (v1i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
	(COPY_TO_REGCLASS
	(KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
	GR8:$src, sub_8bit), (i32 1))), VK1)>;
	def : Pat<(X86kshiftr (X86kshiftl (v16i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
	(COPY_TO_REGCLASS
	(KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
	GR8:$src, sub_8bit), (i32 1))), VK16)>;
	def : Pat<(X86kshiftr (X86kshiftl (v8i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
	(COPY_TO_REGCLASS
	(KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
	GR8:$src, sub_8bit), (i32 1))), VK8)>;

	}

	// Mask unary operation
	// - KNOT
	multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
	RegisterClass KRC, SDPatternOperator OpNode,
	Predicate prd> {
	let Predicates = [prd] in
	def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(set KRC:$dst, (OpNode KRC:$src))]>;
	}

	multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode> {
	defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
	HasDQI>, VEX, PD;
	defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
	HasAVX512>, VEX, PS;
	defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
	HasBWI>, VEX, PD, VEX_W;
	defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
	HasBWI>, VEX, PS, VEX_W;
	}

	defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>;

	// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
	let Predicates = [HasAVX512, NoDQI] in
	def : Pat<(vnot VK8:$src),
	(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;

	def : Pat<(vnot VK4:$src),
	(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
	def : Pat<(vnot VK2:$src),
	(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;

	// Mask binary operation
	// - KAND, KANDN, KOR, KXNOR, KXOR
	multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
	RegisterClass KRC, SDPatternOperator OpNode,
	Predicate prd, bit IsCommutable> {
	let Predicates = [prd], isCommutable = IsCommutable in
	def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
	}

	multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode, bit IsCommutable,
	Predicate prdW = HasAVX512> {
	defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
	HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
	defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
	prdW, IsCommutable>, VEX_4V, VEX_L, PS;
	defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
	HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
	defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
	HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
	}

	def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
	def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
	// These nodes use 'vnot' instead of 'not' to support vectors.
	def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
	def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;

	defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>;
	defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>;
	defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, 1>;
	defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>;
	defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>;
	defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>;

	multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
	Instruction Inst> {
	// With AVX512F, 8-bit mask is promoted to 16-bit mask,
	// for the DQI set, this type is legal and KxxxB instruction is used
	let Predicates = [NoDQI] in
	def : Pat<(VOpNode VK8:$src1, VK8:$src2),
	(COPY_TO_REGCLASS
	(Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
	(COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;

	// All types smaller than 8 bits require conversion anyway
	def : Pat<(OpNode VK1:$src1, VK1:$src2),
	(COPY_TO_REGCLASS (Inst
	(COPY_TO_REGCLASS VK1:$src1, VK16),
	(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
	def : Pat<(VOpNode VK2:$src1, VK2:$src2),
	(COPY_TO_REGCLASS (Inst
	(COPY_TO_REGCLASS VK2:$src1, VK16),
	(COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
	def : Pat<(VOpNode VK4:$src1, VK4:$src2),
	(COPY_TO_REGCLASS (Inst
	(COPY_TO_REGCLASS VK4:$src1, VK16),
	(COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
	}

	defm : avx512_binop_pat<and, and, KANDWrr>;
	defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
	defm : avx512_binop_pat<or, or, KORWrr>;
	defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
	defm : avx512_binop_pat<xor, xor, KXORWrr>;

	// Mask unpacking
	multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
	RegisterClass KRCSrc, Predicate prd> {
	let Predicates = [prd] in {
	let hasSideEffects = 0 in
	def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
	(ins KRC:$src1, KRC:$src2),
	"kunpck"#Suffix#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>,
	VEX_4V, VEX_L;

	def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
	(!cast<Instruction>(NAME##rr)
	(COPY_TO_REGCLASS KRCSrc:$src2, KRC),
	(COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
	}
	}

	defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD;
	defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS;
	defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W;

	// Mask bit testing
	multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
	SDNode OpNode, Predicate prd> {
	let Predicates = [prd], Defs = [EFLAGS] in
	def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1\|$src1, $src2}"),
	[(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
	}

	multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
	Predicate prdW = HasAVX512> {
	defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, HasDQI>,
	VEX, PD;
	defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, prdW>,
	VEX, PS;
	defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, HasBWI>,
	VEX, PS, VEX_W;
	defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, HasBWI>,
	VEX, PD, VEX_W;
	}

	defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
	defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;

	// Mask shift
	multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
	SDNode OpNode> {
	let Predicates = [HasAVX512] in
	def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
	!strconcat(OpcodeStr,
	"\t{$imm, $src, $dst\|$dst, $src, $imm}"),
	[(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
	}

	multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
	SDNode OpNode> {
	defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
	VEX, TAPD, VEX_W;
	let Predicates = [HasDQI] in
	defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
	VEX, TAPD;
	let Predicates = [HasBWI] in {
	defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
	VEX, TAPD, VEX_W;
	defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
	VEX, TAPD;
	}
	}

	defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
	defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;

	multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
	def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr)
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;

	def : Pat<(insert_subvector (v16i1 immAllZerosV),
	(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
	(i64 0)),
	(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr)
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
	(i8 8)), (i8 8))>;

	def : Pat<(insert_subvector (v16i1 immAllZerosV),
	(v8i1 (and VK8:$mask,
	(OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
	(i64 0)),
	(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk)
	(COPY_TO_REGCLASS VK8:$mask, VK16),
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
	(i8 8)), (i8 8))>;
	}

	multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
	AVX512VLVectorVTInfo _> {
	def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri)
	(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
	imm:$cc), VK8)>;

	def : Pat<(insert_subvector (v16i1 immAllZerosV),
	(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
	(i64 0)),
	(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri)
	(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
	imm:$cc),
	(i8 8)), (i8 8))>;

	def : Pat<(insert_subvector (v16i1 immAllZerosV),
	(v8i1 (and VK8:$mask,
	(OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))),
	(i64 0)),
	(KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik)
	(COPY_TO_REGCLASS VK8:$mask, VK16),
	(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
	imm:$cc),
	(i8 8)), (i8 8))>;
	}

	let Predicates = [HasAVX512, NoVLX] in {
	defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">;
	defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">;

	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>;
	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>;
	defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>;
	}

	// Mask setting all 0s or 1s
	multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
	let Predicates = [HasAVX512] in
	let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
	def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
	[(set KRC:$dst, (VT Val))]>;
	}

	multiclass avx512_mask_setop_w<PatFrag Val> {
	defm W : avx512_mask_setop<VK16, v16i1, Val>;
	defm D : avx512_mask_setop<VK32, v32i1, Val>;
	defm Q : avx512_mask_setop<VK64, v64i1, Val>;
	}

	defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
	defm KSET1 : avx512_mask_setop_w<immAllOnesV>;

	// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
	let Predicates = [HasAVX512] in {
	def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
	def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
	def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
	def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
	def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
	def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
	def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
	def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
	}

	// Patterns for kmask insert_subvector/extract_subvector to/from index=0
	multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
	RegisterClass RC, ValueType VT> {
	def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
	(subVT (COPY_TO_REGCLASS RC:$src, subRC))>;

	def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
	(VT (COPY_TO_REGCLASS subRC:$src, RC))>;
	}
	defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
	defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
	defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
	defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
	defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
	defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;

	defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
	defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
	defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
	defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
	defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;

	defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
	defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
	defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
	defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;

	defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
	defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
	defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;

	defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
	defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;

	defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;

	def : Pat<(v2i1 (extract_subvector (v4i1 VK4:$src), (iPTR 2))),
	(v2i1 (COPY_TO_REGCLASS
	(KSHIFTRWri (COPY_TO_REGCLASS VK4:$src, VK16), (i8 2)),
	VK2))>;
	def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 4))),
	(v4i1 (COPY_TO_REGCLASS
	(KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (i8 4)),
	VK4))>;
	def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
	(v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
	def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 16))),
	(v16i1 (COPY_TO_REGCLASS (KSHIFTRDri VK32:$src, (i8 16)), VK16))>;
	def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
	(v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>;


	// Patterns for kmask shift
	multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
	def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))),
	(VT (COPY_TO_REGCLASS
	(KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16),
	(I8Imm $imm)),
	RC))>;
	def : Pat<(VT (X86kshiftr RC:$src, (i8 imm:$imm))),
	(VT (COPY_TO_REGCLASS
	(KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16),
	(I8Imm $imm)),
	RC))>;
	}

	defm : mask_shift_lowering<VK8, v8i1>, Requires<[HasAVX512, NoDQI]>;
	defm : mask_shift_lowering<VK4, v4i1>, Requires<[HasAVX512]>;
	defm : mask_shift_lowering<VK2, v2i1>, Requires<[HasAVX512]>;
	//===----------------------------------------------------------------------===//
	// AVX-512 - Aligned and unaligned load and store
	//


	multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	PatFrag ld_frag, PatFrag mload,
	SDPatternOperator SelectOprr = vselect> {
	let hasSideEffects = 0 in {
	def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"), [],
	_.ExeDomain>, EVEX;
	def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}\|",
	"${dst} {${mask}} {z}, $src}"),
	[(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
	(_.VT _.RC:$src),
	_.ImmAllZerosV)))], _.ExeDomain>,
	EVEX, EVEX_KZ;

	let canFoldAsLoad = 1, isReMaterializable = 1,
	SchedRW = [WriteLoad] in
	def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
	_.ExeDomain>, EVEX;

	let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
	def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
	!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}\|",
	"${dst} {${mask}}, $src1}"),
	[(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
	(_.VT _.RC:$src1),
	(_.VT _.RC:$src0))))], _.ExeDomain>,
	EVEX, EVEX_K;
	let SchedRW = [WriteLoad] in
	def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
	!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}\|",
	"${dst} {${mask}}, $src1}"),
	[(set _.RC:$dst, (_.VT
	(vselect _.KRCWM:$mask,
	(_.VT (bitconvert (ld_frag addr:$src1))),
	(_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K;
	}
	let SchedRW = [WriteLoad] in
	def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.MemOp:$src),
	OpcodeStr #"\t{$src, ${dst} {${mask}} {z}\|"#
	"${dst} {${mask}} {z}, $src}",
	[(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
	(_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
	_.ExeDomain>, EVEX, EVEX_KZ;
	}
	def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
	(!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;

	def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
	(!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;

	def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
	(!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
	_.KRCWM:$mask, addr:$ptr)>;
	}

	multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _,
	Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.AlignedLdFrag,
	masked_load_aligned512>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.AlignedLdFrag,
	masked_load_aligned256>, EVEX_V256;
	defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.AlignedLdFrag,
	masked_load_aligned128>, EVEX_V128;
	}
	}

	multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _,
	Predicate prd,
	SDPatternOperator SelectOprr = vselect> {
	let Predicates = [prd] in
	defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag,
	masked_load_unaligned, SelectOprr>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag,
	masked_load_unaligned, SelectOprr>, EVEX_V256;
	defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag,
	masked_load_unaligned, SelectOprr>, EVEX_V128;
	}
	}

	multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	PatFrag st_frag, PatFrag mstore, string Name> {

	let hasSideEffects = 0 in {
	def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
	OpcodeStr # ".s\t{$src, $dst\|$dst, $src}",
	[], _.ExeDomain>, EVEX, FoldGenData<Name#rr>;
	def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src),
	OpcodeStr # ".s\t{$src, ${dst} {${mask}}\|"#
	"${dst} {${mask}}, $src}",
	[], _.ExeDomain>, EVEX, EVEX_K, FoldGenData<Name#rrk>;
	def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src),
	OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}\|" #
	"${dst} {${mask}} {z}, $src}",
	[], _.ExeDomain>, EVEX, EVEX_KZ, FoldGenData<Name#rrkz>;
	}

	def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(st_frag (_.VT _.RC:$src), addr:$dst)], _.ExeDomain>, EVEX;
	def mrk : AVX512PI<opc, MRMDestMem, (outs),
	(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
	OpcodeStr # "\t{$src, ${dst} {${mask}}\|${dst} {${mask}}, $src}",
	[], _.ExeDomain>, EVEX, EVEX_K;

	def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
	(!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
	_.KRCWM:$mask, _.RC:$src)>;
	}


	multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _, Predicate prd,
	string Name> {
	let Predicates = [prd] in
	defm Z : avx512_store<opc, OpcodeStr, _.info512, store,
	masked_store_unaligned, Name#Z>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_store<opc, OpcodeStr, _.info256, store,
	masked_store_unaligned, Name#Z256>, EVEX_V256;
	defm Z128 : avx512_store<opc, OpcodeStr, _.info128, store,
	masked_store_unaligned, Name#Z128>, EVEX_V128;
	}
	}

	multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo _, Predicate prd,
	string Name> {
	let Predicates = [prd] in
	defm Z : avx512_store<opc, OpcodeStr, _.info512, alignedstore512,
	masked_store_aligned512, Name#Z>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_store<opc, OpcodeStr, _.info256, alignedstore256,
	masked_store_aligned256, Name#Z256>, EVEX_V256;
	defm Z128 : avx512_store<opc, OpcodeStr, _.info128, alignedstore,
	masked_store_aligned128, Name#Z128>, EVEX_V128;
	}
	}

	defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
	HasAVX512>,
	avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
	HasAVX512, "VMOVAPS">,
	PS, EVEX_CD8<32, CD8VF>;

	defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
	HasAVX512>,
	avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
	HasAVX512, "VMOVAPD">,
	PD, VEX_W, EVEX_CD8<64, CD8VF>;

	defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
	null_frag>,
	avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
	"VMOVUPS">,
	PS, EVEX_CD8<32, CD8VF>;

	defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
	null_frag>,
	avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
	"VMOVUPD">,
	PD, VEX_W, EVEX_CD8<64, CD8VF>;

	defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
	HasAVX512>,
	avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
	HasAVX512, "VMOVDQA32">,
	PD, EVEX_CD8<32, CD8VF>;

	defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
	HasAVX512>,
	avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
	HasAVX512, "VMOVDQA64">,
	PD, VEX_W, EVEX_CD8<64, CD8VF>;

	defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>,
	avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
	HasBWI, "VMOVDQU8">,
	XD, EVEX_CD8<8, CD8VF>;

	defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
	avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
	HasBWI, "VMOVDQU16">,
	XD, VEX_W, EVEX_CD8<16, CD8VF>;

	defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
	null_frag>,
	avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
	HasAVX512, "VMOVDQU32">,
	XS, EVEX_CD8<32, CD8VF>;

	defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
	null_frag>,
	avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
	HasAVX512, "VMOVDQU64">,
	XS, VEX_W, EVEX_CD8<64, CD8VF>;

	// Special instructions to help with spilling when we don't have VLX. We need
	// to load or store from a ZMM register instead. These are converted in
	// expandPostRAPseudos.
	let isReMaterializable = 1, canFoldAsLoad = 1,
	isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
	def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
	"", []>;
	def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
	"", []>;
	def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
	"", []>;
	def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
	"", []>;
	}

	let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
	def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
	"", []>;
	def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
	"", []>;
	def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
	"", []>;
	def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
	"", []>;
	}

	def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
	(v8i64 VR512:$src))),
	(VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
	VK8), VR512:$src)>;

	def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
	(v16i32 VR512:$src))),
	(VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;

	// These patterns exist to prevent the above patterns from introducing a second
	// mask inversion when one already exists.
	def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
	(bc_v8i64 (v16i32 immAllZerosV)),
	(v8i64 VR512:$src))),
	(VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
	def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
	(v16i32 immAllZerosV),
	(v16i32 VR512:$src))),
	(VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;

	// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
	// available. Use a 512-bit operation and extract.
	let Predicates = [HasAVX512, NoVLX] in {
	def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
	(v8f32 VR256X:$src0))),
	(EXTRACT_SUBREG
	(v16f32
	(VMOVAPSZrrk
	(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
	(COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
	(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
	sub_ymm)>;

	def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
	(v8i32 VR256X:$src0))),
	(EXTRACT_SUBREG
	(v16i32
	(VMOVDQA32Zrrk
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
	(COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
	sub_ymm)>;
	}

	let Predicates = [HasVLX, NoBWI] in {
	// 128-bit load/store without BWI.
	def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
	(VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
	def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
	(VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
	def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
	(VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
	def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
	(VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;

	// 256-bit load/store without BWI.
	def : Pat<(alignedstore256 (v16i16 VR256X:$src), addr:$dst),
	(VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
	def : Pat<(alignedstore256 (v32i8 VR256X:$src), addr:$dst),
	(VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
	def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
	(VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
	def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
	(VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
	}

	let Predicates = [HasVLX] in {
	// Special patterns for storing subvector extracts of lower 128-bits of 256.
	// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
	def : Pat<(alignedstore (v2f64 (extract_subvector
	(v4f64 VR256X:$src), (iPTR 0))), addr:$dst),
	(VMOVAPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
	def : Pat<(alignedstore (v4f32 (extract_subvector
	(v8f32 VR256X:$src), (iPTR 0))), addr:$dst),
	(VMOVAPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
	def : Pat<(alignedstore (v2i64 (extract_subvector
	(v4i64 VR256X:$src), (iPTR 0))), addr:$dst),
	(VMOVDQA64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
	def : Pat<(alignedstore (v4i32 (extract_subvector
	(v8i32 VR256X:$src), (iPTR 0))), addr:$dst),
	(VMOVDQA32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
	def : Pat<(alignedstore (v8i16 (extract_subvector
	(v16i16 VR256X:$src), (iPTR 0))), addr:$dst),
	(VMOVDQA32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
	def : Pat<(alignedstore (v16i8 (extract_subvector
	(v32i8 VR256X:$src), (iPTR 0))), addr:$dst),
	(VMOVDQA32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;

	def : Pat<(store (v2f64 (extract_subvector
	(v4f64 VR256X:$src), (iPTR 0))), addr:$dst),
	(VMOVUPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
	def : Pat<(store (v4f32 (extract_subvector
	(v8f32 VR256X:$src), (iPTR 0))), addr:$dst),
	(VMOVUPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
	def : Pat<(store (v2i64 (extract_subvector
	(v4i64 VR256X:$src), (iPTR 0))), addr:$dst),
	(VMOVDQU64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
	def : Pat<(store (v4i32 (extract_subvector
	(v8i32 VR256X:$src), (iPTR 0))), addr:$dst),
	(VMOVDQU32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
	def : Pat<(store (v8i16 (extract_subvector
	(v16i16 VR256X:$src), (iPTR 0))), addr:$dst),
	(VMOVDQU32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
	def : Pat<(store (v16i8 (extract_subvector
	(v32i8 VR256X:$src), (iPTR 0))), addr:$dst),
	(VMOVDQU32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;

	// Special patterns for storing subvector extracts of lower 128-bits of 512.
	// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
	def : Pat<(alignedstore (v2f64 (extract_subvector
	(v8f64 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVAPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
	def : Pat<(alignedstore (v4f32 (extract_subvector
	(v16f32 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVAPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
	def : Pat<(alignedstore (v2i64 (extract_subvector
	(v8i64 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQA64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
	def : Pat<(alignedstore (v4i32 (extract_subvector
	(v16i32 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQA32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
	def : Pat<(alignedstore (v8i16 (extract_subvector
	(v32i16 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQA32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
	def : Pat<(alignedstore (v16i8 (extract_subvector
	(v64i8 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQA32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;

	def : Pat<(store (v2f64 (extract_subvector
	(v8f64 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVUPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
	def : Pat<(store (v4f32 (extract_subvector
	(v16f32 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVUPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
	def : Pat<(store (v2i64 (extract_subvector
	(v8i64 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQU64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
	def : Pat<(store (v4i32 (extract_subvector
	(v16i32 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQU32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
	def : Pat<(store (v8i16 (extract_subvector
	(v32i16 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQU32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
	def : Pat<(store (v16i8 (extract_subvector
	(v64i8 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQU32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;

	// Special patterns for storing subvector extracts of lower 256-bits of 512.
	// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
	def : Pat<(alignedstore256 (v4f64 (extract_subvector
	(v8f64 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVAPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
	- def : Pat<(alignedstore (v8f32 (extract_subvector
	- (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
	+ def : Pat<(alignedstore256 (v8f32 (extract_subvector
	+ (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVAPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
	def : Pat<(alignedstore256 (v4i64 (extract_subvector
	(v8i64 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQA64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
	def : Pat<(alignedstore256 (v8i32 (extract_subvector
	(v16i32 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQA32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
	def : Pat<(alignedstore256 (v16i16 (extract_subvector
	(v32i16 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQA32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
	def : Pat<(alignedstore256 (v32i8 (extract_subvector
	(v64i8 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQA32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;

	def : Pat<(store (v4f64 (extract_subvector
	(v8f64 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVUPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
	def : Pat<(store (v8f32 (extract_subvector
	(v16f32 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVUPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
	def : Pat<(store (v4i64 (extract_subvector
	(v8i64 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQU64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
	def : Pat<(store (v8i32 (extract_subvector
	(v16i32 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQU32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
	def : Pat<(store (v16i16 (extract_subvector
	(v32i16 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQU32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
	def : Pat<(store (v32i8 (extract_subvector
	(v64i8 VR512:$src), (iPTR 0))), addr:$dst),
	(VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
	}


	// Move Int Doubleword to Packed Double Int
	//
	let ExeDomain = SSEPackedInt in {
	def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(set VR128X:$dst,
	(v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
	EVEX;
	def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(set VR128X:$dst,
	(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
	IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
	def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set VR128X:$dst,
	(v2i64 (scalar_to_vector GR64:$src)))],
	IIC_SSE_MOVDQ>, EVEX, VEX_W;
	let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
	def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
	(ins i64mem:$src),
	"vmovq\t{$src, $dst\|$dst, $src}", []>,
	EVEX, VEX_W, EVEX_CD8<64, CD8VT1>;
	let isCodeGenOnly = 1 in {
	def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set FR64X:$dst, (bitconvert GR64:$src))],
	IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
	def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
	EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
	def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set GR64:$dst, (bitconvert FR64X:$src))],
	IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
	def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
	IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
	EVEX_CD8<64, CD8VT1>;
	}
	} // ExeDomain = SSEPackedInt

	// Move Int Doubleword to Single Scalar
	//
	let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
	def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(set FR32X:$dst, (bitconvert GR32:$src))],
	IIC_SSE_MOVDQ>, EVEX;

	def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
	IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
	} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1

	// Move doubleword from xmm register to r/m32
	//
	let ExeDomain = SSEPackedInt in {
	def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
	(iPTR 0)))], IIC_SSE_MOVD_ToGP>,
	EVEX;
	def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
	(ins i32mem:$dst, VR128X:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(store (i32 (extractelt (v4i32 VR128X:$src),
	(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
	EVEX, EVEX_CD8<32, CD8VT1>;
	} // ExeDomain = SSEPackedInt

	// Move quadword from xmm1 register to r/m64
	//
	let ExeDomain = SSEPackedInt in {
	def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
	(iPTR 0)))],
	IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
	Requires<[HasAVX512, In64BitMode]>;

	let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
	def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
	Requires<[HasAVX512, In64BitMode]>;

	def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
	(ins i64mem:$dst, VR128X:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
	addr:$dst)], IIC_SSE_MOVDQ>,
	EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
	Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;

	let hasSideEffects = 0 in
	def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
	(ins VR128X:$src),
	"vmovq.s\t{$src, $dst\|$dst, $src}",[]>,
	EVEX, VEX_W;
	} // ExeDomain = SSEPackedInt

	// Move Scalar Single to Double Int
	//
	let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
	def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
	(ins FR32X:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(set GR32:$dst, (bitconvert FR32X:$src))],
	IIC_SSE_MOVD_ToGP>, EVEX;
	def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
	(ins i32mem:$dst, FR32X:$src),
	"vmovd\t{$src, $dst\|$dst, $src}",
	[(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
	IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
	} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1

	// Move Quadword Int to Packed Quadword Int
	//
	let ExeDomain = SSEPackedInt in {
	def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
	(ins i64mem:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set VR128X:$dst,
	(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
	EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
	} // ExeDomain = SSEPackedInt

	//===----------------------------------------------------------------------===//
	// AVX-512 MOVSS, MOVSD
	//===----------------------------------------------------------------------===//

	multiclass avx512_move_scalar<string asm, SDNode OpNode,
	X86VectorVTInfo _> {
	def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
	(ins _.RC:$src1, _.FRC:$src2),
	!strconcat(asm, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
	(scalar_to_vector _.FRC:$src2))))],
	_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
	def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2),
	!strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}\|",
	"$dst {${mask}} {z}, $src1, $src2}"),
	[(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
	(_.VT (OpNode _.RC:$src1,
	(scalar_to_vector _.FRC:$src2))),
	_.ImmAllZerosV)))],
	_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ;
	let Constraints = "$src0 = $dst" in
	def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
	(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2),
	!strconcat(asm, "\t{$src2, $src1, $dst {${mask}}\|",
	"$dst {${mask}}, $src1, $src2}"),
	[(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
	(_.VT (OpNode _.RC:$src1,
	(scalar_to_vector _.FRC:$src2))),
	(_.VT _.RC:$src0))))],
	_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K;
	let canFoldAsLoad = 1, isReMaterializable = 1 in
	def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
	!strconcat(asm, "\t{$src, $dst\|$dst, $src}"),
	[(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
	_.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
	let mayLoad = 1, hasSideEffects = 0 in {
	let Constraints = "$src0 = $dst" in
	def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
	(ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
	!strconcat(asm, "\t{$src, $dst {${mask}}\|",
	"$dst {${mask}}, $src}"),
	[], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K;
	def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
	(ins _.KRCWM:$mask, _.ScalarMemOp:$src),
	!strconcat(asm, "\t{$src, $dst {${mask}} {z}\|",
	"$dst {${mask}} {z}, $src}"),
	[], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ;
	}
	def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
	!strconcat(asm, "\t{$src, $dst\|$dst, $src}"),
	[(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
	EVEX;
	let mayStore = 1, hasSideEffects = 0 in
	def mrk: AVX512PI<0x11, MRMDestMem, (outs),
	(ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
	!strconcat(asm, "\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}"),
	[], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K;
	}

	defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
	VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;

	defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
	VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;


	multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
	PatLeaf ZeroFP, X86VectorVTInfo _> {

	def : Pat<(_.VT (OpNode _.RC:$src0,
	(_.VT (scalar_to_vector
	(_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
	(_.EltVT _.FRC:$src1),
	(_.EltVT _.FRC:$src2))))))),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrk)
	(COPY_TO_REGCLASS _.FRC:$src2, _.RC),
	(COPY_TO_REGCLASS GR32:$mask, VK1WM),
	(_.VT _.RC:$src0), _.FRC:$src1),
	_.RC)>;

	def : Pat<(_.VT (OpNode _.RC:$src0,
	(_.VT (scalar_to_vector
	(_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
	(_.EltVT _.FRC:$src1),
	(_.EltVT ZeroFP))))))),
	(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrkz)
	(COPY_TO_REGCLASS GR32:$mask, VK1WM),
	(_.VT _.RC:$src0), _.FRC:$src1),
	_.RC)>;
	}

	multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
	dag Mask, RegisterClass MaskRC> {

	def : Pat<(masked_store addr:$dst, Mask,
	(_.info512.VT (insert_subvector undef,
	(_.info256.VT (insert_subvector undef,
	(_.info128.VT _.info128.RC:$src),
	(iPTR 0))),
	(iPTR 0)))),
	(!cast<Instruction>(InstrStr#mrk) addr:$dst,
	(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
	(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;

	}

	multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
	AVX512VLVectorVTInfo _,
	dag Mask, RegisterClass MaskRC,
	SubRegIndex subreg> {

	def : Pat<(masked_store addr:$dst, Mask,
	(_.info512.VT (insert_subvector undef,
	(_.info256.VT (insert_subvector undef,
	(_.info128.VT _.info128.RC:$src),
	(iPTR 0))),
	(iPTR 0)))),
	(!cast<Instruction>(InstrStr#mrk) addr:$dst,
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
	(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;

	}

	multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
	dag Mask, RegisterClass MaskRC> {

	def : Pat<(_.info128.VT (extract_subvector
	(_.info512.VT (masked_load addr:$srcAddr, Mask,
	(_.info512.VT (bitconvert
	(v16i32 immAllZerosV))))),
	(iPTR 0))),
	(!cast<Instruction>(InstrStr#rmkz)
	(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
	addr:$srcAddr)>;

	def : Pat<(_.info128.VT (extract_subvector
	(_.info512.VT (masked_load addr:$srcAddr, Mask,
	(_.info512.VT (insert_subvector undef,
	(_.info256.VT (insert_subvector undef,
	(_.info128.VT (X86vzmovl _.info128.RC:$src)),
	(iPTR 0))),
	(iPTR 0))))),
	(iPTR 0))),
	(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
	(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
	addr:$srcAddr)>;

	}

	multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
	AVX512VLVectorVTInfo _,
	dag Mask, RegisterClass MaskRC,
	SubRegIndex subreg> {

	def : Pat<(_.info128.VT (extract_subvector
	(_.info512.VT (masked_load addr:$srcAddr, Mask,
	(_.info512.VT (bitconvert
	(v16i32 immAllZerosV))))),
	(iPTR 0))),
	(!cast<Instruction>(InstrStr#rmkz)
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
	addr:$srcAddr)>;

	def : Pat<(_.info128.VT (extract_subvector
	(_.info512.VT (masked_load addr:$srcAddr, Mask,
	(_.info512.VT (insert_subvector undef,
	(_.info256.VT (insert_subvector undef,
	(_.info128.VT (X86vzmovl _.info128.RC:$src)),
	(iPTR 0))),
	(iPTR 0))))),
	(iPTR 0))),
	(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
	(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
	addr:$srcAddr)>;

	}

	defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
	defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;

	defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
	(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
	defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
	(v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
	defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
	(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;

	defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
	(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
	defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
	(v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
	defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
	(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;

	def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
	(COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
	VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;

	def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
	(COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
	VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;

	def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
	(VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
	(COPY_TO_REGCLASS VR128X:$src, FR32X))>;

	let hasSideEffects = 0 in {
	def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins VR128X:$src1, FR32X:$src2),
	"vmovss.s\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[], NoItinerary>, XS, EVEX_4V, VEX_LIG,
	FoldGenData<"VMOVSSZrr">;

	let Constraints = "$src0 = $dst" in
	def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
	VR128X:$src1, FR32X:$src2),
	"vmovss.s\t{$src2, $src1, $dst {${mask}}\|"#
	"$dst {${mask}}, $src1, $src2}",
	[], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG,
	FoldGenData<"VMOVSSZrrk">;

	def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2),
	"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}\|"#
	"$dst {${mask}} {z}, $src1, $src2}",
	[], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
	FoldGenData<"VMOVSSZrrkz">;

	def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins VR128X:$src1, FR64X:$src2),
	"vmovsd.s\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W,
	FoldGenData<"VMOVSDZrr">;

	let Constraints = "$src0 = $dst" in
	def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
	VR128X:$src1, FR64X:$src2),
	"vmovsd.s\t{$src2, $src1, $dst {${mask}}\|"#
	"$dst {${mask}}, $src1, $src2}",
	[], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG,
	VEX_W, FoldGenData<"VMOVSDZrrk">;

	def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
	(ins f64x_info.KRCWM:$mask, VR128X:$src1,
	FR64X:$src2),
	"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}\|"#
	"$dst {${mask}} {z}, $src1, $src2}",
	[], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
	VEX_W, FoldGenData<"VMOVSDZrrkz">;
	}

	let Predicates = [HasAVX512] in {
	let AddedComplexity = 15 in {
	// Move scalar to XMM zero-extended, zeroing a VR128X then do a
	// MOVS{S,D} to the lower bits.
	def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
	(VMOVSSZrr (v4f32 (AVX512_128_SET0)), FR32X:$src)>;
	def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
	(VMOVSSZrr (v4f32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
	def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
	(VMOVSSZrr (v4i32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
	def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
	(VMOVSDZrr (v2f64 (AVX512_128_SET0)), FR64X:$src)>;
	}

	// Move low f32 and clear high bits.
	def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
	(SUBREG_TO_REG (i32 0),
	(VMOVSSZrr (v4f32 (AVX512_128_SET0)),
	(EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
	def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
	(SUBREG_TO_REG (i32 0),
	(VMOVSSZrr (v4i32 (AVX512_128_SET0)),
	(EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
	def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
	(SUBREG_TO_REG (i32 0),
	(VMOVSSZrr (v4f32 (AVX512_128_SET0)),
	(EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
	def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
	(SUBREG_TO_REG (i32 0),
	(VMOVSSZrr (v4i32 (AVX512_128_SET0)),
	(EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;

	let AddedComplexity = 20 in {
	// MOVSSrm zeros the high parts of the register; represent this
	// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
	def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
	(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
	def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
	(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
	def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
	(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
	def : Pat<(v4f32 (X86vzload addr:$src)),
	(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;

	// MOVSDrm zeros the high parts of the register; represent this
	// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
	def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
	(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
	def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
	(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
	def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
	(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
	def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
	(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
	def : Pat<(v2f64 (X86vzload addr:$src)),
	(COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;

	// Represent the same patterns above but in the form they appear for
	// 256-bit types
	def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
	(v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
	def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
	(v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
	def : Pat<(v8f32 (X86vzload addr:$src)),
	(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
	def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
	(v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
	def : Pat<(v4f64 (X86vzload addr:$src)),
	(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;

	// Represent the same patterns above but in the form they appear for
	// 512-bit types
	def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
	(v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
	def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
	(v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
	def : Pat<(v16f32 (X86vzload addr:$src)),
	(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
	def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
	(v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
	def : Pat<(v8f64 (X86vzload addr:$src)),
	(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
	}
	def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
	(v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
	FR32X:$src)), sub_xmm)>;
	def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
	(v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
	(SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
	FR64X:$src)), sub_xmm)>;
	def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
	(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
	(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;

	// Move low f64 and clear high bits.
	def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
	(SUBREG_TO_REG (i32 0),
	(VMOVSDZrr (v2f64 (AVX512_128_SET0)),
	(EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
	def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
	(SUBREG_TO_REG (i32 0),
	(VMOVSDZrr (v2f64 (AVX512_128_SET0)),
	(EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;

	def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
	(SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
	(EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
	def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
	(SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
	(EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;

	// Extract and store.
	def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
	addr:$dst),
	(VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;

	// Shuffle with VMOVSS
	def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
	(VMOVSSZrr (v4i32 VR128X:$src1),
	(COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
	def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
	(VMOVSSZrr (v4f32 VR128X:$src1),
	(COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;

	// 256-bit variants
	def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
	(SUBREG_TO_REG (i32 0),
	(VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
	(EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
	sub_xmm)>;
	def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
	(SUBREG_TO_REG (i32 0),
	(VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
	(EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
	sub_xmm)>;

	// Shuffle with VMOVSD
	def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
	(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
	def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
	(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;

	// 256-bit variants
	def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
	(SUBREG_TO_REG (i32 0),
	(VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
	(EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
	sub_xmm)>;
	def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
	(SUBREG_TO_REG (i32 0),
	(VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
	(EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
	sub_xmm)>;

	def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
	(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
	def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
	(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
	def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
	(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
	def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
	(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
	}

	let AddedComplexity = 15 in
	def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
	(ins VR128X:$src),
	"vmovq\t{$src, $dst\|$dst, $src}",
	[(set VR128X:$dst, (v2i64 (X86vzmovl
	(v2i64 VR128X:$src))))],
	IIC_SSE_MOVQ_RR>, EVEX, VEX_W;

	let Predicates = [HasAVX512] in {
	let AddedComplexity = 15 in {
	def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
	(VMOVDI2PDIZrr GR32:$src)>;

	def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
	(VMOV64toPQIZrr GR64:$src)>;

	def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
	(v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
	(SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;

	def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
	(v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
	(SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
	}
	// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
	let AddedComplexity = 20 in {
	def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
	(VMOVDI2PDIZrm addr:$src)>;
	def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
	(VMOVDI2PDIZrm addr:$src)>;
	def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
	(VMOVDI2PDIZrm addr:$src)>;
	def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
	(VMOVDI2PDIZrm addr:$src)>;
	def : Pat<(v4i32 (X86vzload addr:$src)),
	(VMOVDI2PDIZrm addr:$src)>;
	def : Pat<(v8i32 (X86vzload addr:$src)),
	(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
	def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
	(VMOVQI2PQIZrm addr:$src)>;
	def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
	(VMOVZPQILo2PQIZrr VR128X:$src)>;
	def : Pat<(v2i64 (X86vzload addr:$src)),
	(VMOVQI2PQIZrm addr:$src)>;
	def : Pat<(v4i64 (X86vzload addr:$src)),
	(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
	}

	// Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
	def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
	(v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
	def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
	(v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
	(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;

	// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
	def : Pat<(v16i32 (X86vzload addr:$src)),
	(SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
	def : Pat<(v8i64 (X86vzload addr:$src)),
	(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
	}
	//===----------------------------------------------------------------------===//
	// AVX-512 - Non-temporals
	//===----------------------------------------------------------------------===//
	let SchedRW = [WriteLoad] in {
	def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
	(ins i512mem:$src), "vmovntdqa\t{$src, $dst\|$dst, $src}",
	[], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
	EVEX_CD8<64, CD8VF>;

	let Predicates = [HasVLX] in {
	def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
	(ins i256mem:$src),
	"vmovntdqa\t{$src, $dst\|$dst, $src}",
	[], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
	EVEX_CD8<64, CD8VF>;

	def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
	(ins i128mem:$src),
	"vmovntdqa\t{$src, $dst\|$dst, $src}",
	[], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
	EVEX_CD8<64, CD8VF>;
	}
	}

	multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	PatFrag st_frag = alignednontemporalstore,
	InstrItinClass itin = IIC_SSE_MOVNT> {
	let SchedRW = [WriteStore], AddedComplexity = 400 in
	def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(st_frag (_.VT _.RC:$src), addr:$dst)],
	_.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
	}

	multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;

	let Predicates = [HasAVX512, HasVLX] in {
	defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
	defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
	}
	}

	defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
	defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
	defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;

	let Predicates = [HasAVX512], AddedComplexity = 400 in {
	def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
	(VMOVNTDQZmr addr:$dst, VR512:$src)>;
	def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
	(VMOVNTDQZmr addr:$dst, VR512:$src)>;
	def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
	(VMOVNTDQZmr addr:$dst, VR512:$src)>;

	def : Pat<(v8f64 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZrm addr:$src)>;
	def : Pat<(v16f32 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZrm addr:$src)>;
	def : Pat<(v8i64 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZrm addr:$src)>;
	def : Pat<(v16i32 (bitconvert (v8i64 (alignednontemporalload addr:$src)))),
	(VMOVNTDQAZrm addr:$src)>;
	def : Pat<(v32i16 (bitconvert (v8i64 (alignednontemporalload addr:$src)))),
	(VMOVNTDQAZrm addr:$src)>;
	def : Pat<(v64i8 (bitconvert (v8i64 (alignednontemporalload addr:$src)))),
	(VMOVNTDQAZrm addr:$src)>;
	}

	let Predicates = [HasVLX], AddedComplexity = 400 in {
	def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
	(VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
	def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
	(VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
	def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
	(VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;

	def : Pat<(v4f64 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ256rm addr:$src)>;
	def : Pat<(v8f32 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ256rm addr:$src)>;
	def : Pat<(v4i64 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ256rm addr:$src)>;
	def : Pat<(v8i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
	(VMOVNTDQAZ256rm addr:$src)>;
	def : Pat<(v16i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
	(VMOVNTDQAZ256rm addr:$src)>;
	def : Pat<(v32i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
	(VMOVNTDQAZ256rm addr:$src)>;

	def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
	(VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
	def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
	(VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
	def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
	(VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;

	def : Pat<(v2f64 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ128rm addr:$src)>;
	def : Pat<(v4f32 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ128rm addr:$src)>;
	def : Pat<(v2i64 (alignednontemporalload addr:$src)),
	(VMOVNTDQAZ128rm addr:$src)>;
	def : Pat<(v4i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
	(VMOVNTDQAZ128rm addr:$src)>;
	def : Pat<(v8i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
	(VMOVNTDQAZ128rm addr:$src)>;
	def : Pat<(v16i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
	(VMOVNTDQAZ128rm addr:$src)>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 - Integer arithmetic
	//
	multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, OpndItins itins,
	bit IsCommutable = 0> {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
	itins.rr, IsCommutable>,
	AVX512BIBase, EVEX_4V;

	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1,
	(bitconvert (_.LdFrag addr:$src2)))),
	itins.rm>,
	AVX512BIBase, EVEX_4V;
	}

	multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, OpndItins itins,
	bit IsCommutable = 0> :
	avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
	defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
	"${src2}"##_.BroadcastStr##", $src1",
	"$src1, ${src2}"##_.BroadcastStr,
	(_.VT (OpNode _.RC:$src1,
	(X86VBroadcast
	(_.ScalarLdFrag addr:$src2)))),
	itins.rm>,
	AVX512BIBase, EVEX_4V, EVEX_B;
	}

	multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo, OpndItins itins,
	Predicate prd, bit IsCommutable = 0> {
	let Predicates = [prd] in
	defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
	IsCommutable>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
	IsCommutable>, EVEX_V256;
	defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
	IsCommutable>, EVEX_V128;
	}
	}

	multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo, OpndItins itins,
	Predicate prd, bit IsCommutable = 0> {
	let Predicates = [prd] in
	defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
	IsCommutable>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
	IsCommutable>, EVEX_V256;
	defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
	IsCommutable>, EVEX_V128;
	}
	}

	multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
	OpndItins itins, Predicate prd,
	bit IsCommutable = 0> {
	defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
	itins, prd, IsCommutable>,
	VEX_W, EVEX_CD8<64, CD8VF>;
	}

	multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
	OpndItins itins, Predicate prd,
	bit IsCommutable = 0> {
	defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
	itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
	}

	multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
	OpndItins itins, Predicate prd,
	bit IsCommutable = 0> {
	defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
	itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>;
	}

	multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
	OpndItins itins, Predicate prd,
	bit IsCommutable = 0> {
	defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
	itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>;
	}

	multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
	SDNode OpNode, OpndItins itins, Predicate prd,
	bit IsCommutable = 0> {
	defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
	IsCommutable>;

	defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
	IsCommutable>;
	}

	multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
	SDNode OpNode, OpndItins itins, Predicate prd,
	bit IsCommutable = 0> {
	defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
	IsCommutable>;

	defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
	IsCommutable>;
	}

	multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
	bits<8> opc_d, bits<8> opc_q,
	string OpcodeStr, SDNode OpNode,
	OpndItins itins, bit IsCommutable = 0> {
	defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
	itins, HasAVX512, IsCommutable>,
	avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
	itins, HasBWI, IsCommutable>;
	}

	multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
	SDNode OpNode,X86VectorVTInfo _Src,
	X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
	bit IsCommutable = 0> {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
	"$src2, $src1","$src1, $src2",
	(_Dst.VT (OpNode
	(_Src.VT _Src.RC:$src1),
	(_Src.VT _Src.RC:$src2))),
	itins.rr, IsCommutable>,
	AVX512BIBase, EVEX_4V;
	defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
	(bitconvert (_Src.LdFrag addr:$src2)))),
	itins.rm>,
	AVX512BIBase, EVEX_4V;

	defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
	OpcodeStr,
	"${src2}"##_Brdct.BroadcastStr##", $src1",
	"$src1, ${src2}"##_Brdct.BroadcastStr,
	(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
	(_Brdct.VT (X86VBroadcast
	(_Brdct.ScalarLdFrag addr:$src2)))))),
	itins.rm>,
	AVX512BIBase, EVEX_4V, EVEX_B;
	}

	defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
	SSE_INTALU_ITINS_P, 1>;
	defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
	SSE_INTALU_ITINS_P, 0>;
	defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
	SSE_INTALU_ITINS_P, HasBWI, 1>;
	defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
	SSE_INTALU_ITINS_P, HasBWI, 0>;
	defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
	SSE_INTALU_ITINS_P, HasBWI, 1>;
	defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
	SSE_INTALU_ITINS_P, HasBWI, 0>;
	defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
	SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
	defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
	SSE_INTALU_ITINS_P, HasBWI, 1>;
	defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
	SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
	defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
	HasBWI, 1>;
	defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
	HasBWI, 1>;
	defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
	HasBWI, 1>, T8PD;
	defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
	SSE_INTALU_ITINS_P, HasBWI, 1>;

	multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
	AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
	SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
	let Predicates = [prd] in
	defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
	_SrcVTInfo.info512, _DstVTInfo.info512,
	v8i64_info, IsCommutable>,
	EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
	let Predicates = [HasVLX, prd] in {
	defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
	_SrcVTInfo.info256, _DstVTInfo.info256,
	v4i64x_info, IsCommutable>,
	EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
	defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
	_SrcVTInfo.info128, _DstVTInfo.info128,
	v2i64x_info, IsCommutable>,
	EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
	}
	}

	defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
	avx512vl_i32_info, avx512vl_i64_info,
	X86pmuldq, HasAVX512, 1>,T8PD;
	defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
	avx512vl_i32_info, avx512vl_i64_info,
	X86pmuludq, HasAVX512, 1>;
	defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
	avx512vl_i8_info, avx512vl_i8_info,
	X86multishift, HasVBMI, 0>, T8PD;

	multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _Src, X86VectorVTInfo _Dst> {
	defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
	OpcodeStr,
	"${src2}"##_Src.BroadcastStr##", $src1",
	"$src1, ${src2}"##_Src.BroadcastStr,
	(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
	(_Src.VT (X86VBroadcast
	(_Src.ScalarLdFrag addr:$src2))))))>,
	EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>;
	}

	multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
	SDNode OpNode,X86VectorVTInfo _Src,
	X86VectorVTInfo _Dst, bit IsCommutable = 0> {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
	"$src2, $src1","$src1, $src2",
	(_Dst.VT (OpNode
	(_Src.VT _Src.RC:$src1),
	(_Src.VT _Src.RC:$src2))),
	NoItinerary, IsCommutable>,
	EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
	defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
	(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
	(bitconvert (_Src.LdFrag addr:$src2))))>,
	EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>;
	}

	multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
	SDNode OpNode> {
	let Predicates = [HasBWI] in
	defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
	v32i16_info>,
	avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
	v32i16_info>, EVEX_V512;
	let Predicates = [HasBWI, HasVLX] in {
	defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
	v16i16x_info>,
	avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
	v16i16x_info>, EVEX_V256;
	defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
	v8i16x_info>,
	avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
	v8i16x_info>, EVEX_V128;
	}
	}
	multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
	SDNode OpNode> {
	let Predicates = [HasBWI] in
	defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
	v64i8_info>, EVEX_V512;
	let Predicates = [HasBWI, HasVLX] in {
	defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
	v32i8x_info>, EVEX_V256;
	defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
	v16i8x_info>, EVEX_V128;
	}
	}

	multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
	SDNode OpNode, AVX512VLVectorVTInfo _Src,
	AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
	let Predicates = [HasBWI] in
	defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
	_Dst.info512, IsCommutable>, EVEX_V512;
	let Predicates = [HasBWI, HasVLX] in {
	defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
	_Dst.info256, IsCommutable>, EVEX_V256;
	defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
	_Dst.info128, IsCommutable>, EVEX_V128;
	}
	}

	defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
	defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
	defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
	defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;

	defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
	avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD;
	defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
	avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase;

	defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
	SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
	defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
	SSE_INTALU_ITINS_P, HasBWI, 1>;
	defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
	SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;

	defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
	SSE_INTALU_ITINS_P, HasBWI, 1>;
	defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
	SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
	defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
	SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;

	defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
	SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
	defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
	SSE_INTALU_ITINS_P, HasBWI, 1>;
	defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
	SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;

	defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
	SSE_INTALU_ITINS_P, HasBWI, 1>;
	defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
	SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
	defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
	SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;

	// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
	let Predicates = [HasDQI, NoVLX] in {
	def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
	(EXTRACT_SUBREG
	(VPMULLQZrr
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
	sub_ymm)>;

	def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
	(EXTRACT_SUBREG
	(VPMULLQZrr
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
	sub_xmm)>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 Logical Instructions
	//===----------------------------------------------------------------------===//

	multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, bit IsCommutable = 0> {
	defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
	(bitconvert (_.VT _.RC:$src2)))),
	(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
	_.RC:$src2)))),
	IIC_SSE_BIT_P_RR, IsCommutable>,
	AVX512BIBase, EVEX_4V;

	defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
	(bitconvert (_.LdFrag addr:$src2)))),
	(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
	(bitconvert (_.LdFrag addr:$src2)))))),
	IIC_SSE_BIT_P_RM>,
	AVX512BIBase, EVEX_4V;
	}

	multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, bit IsCommutable = 0> :
	avx512_logic_rm<opc, OpcodeStr, OpNode, _, IsCommutable> {
	defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
	"${src2}"##_.BroadcastStr##", $src1",
	"$src1, ${src2}"##_.BroadcastStr,
	(_.i64VT (OpNode _.RC:$src1,
	(bitconvert
	(_.VT (X86VBroadcast
	(_.ScalarLdFrag addr:$src2)))))),
	(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
	(bitconvert
	(_.VT (X86VBroadcast
	(_.ScalarLdFrag addr:$src2)))))))),
	IIC_SSE_BIT_P_RM>,
	AVX512BIBase, EVEX_4V, EVEX_B;
	}

	multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo,
	bit IsCommutable = 0> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info512,
	IsCommutable>, EVEX_V512;

	let Predicates = [HasAVX512, HasVLX] in {
	defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
	IsCommutable>, EVEX_V256;
	defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
	IsCommutable>, EVEX_V128;
	}
	}

	multiclass avx512_logic_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
	bit IsCommutable = 0> {
	defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
	IsCommutable>, EVEX_CD8<32, CD8VF>;
	}

	multiclass avx512_logic_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
	bit IsCommutable = 0> {
	defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
	IsCommutable>,
	VEX_W, EVEX_CD8<64, CD8VF>;
	}

	multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
	SDNode OpNode, bit IsCommutable = 0> {
	defm Q : avx512_logic_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, IsCommutable>;
	defm D : avx512_logic_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, IsCommutable>;
	}

	defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, 1>;
	defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, 1>;
	defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 1>;
	defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp>;

	//===----------------------------------------------------------------------===//
	// AVX-512 FP arithmetic
	//===----------------------------------------------------------------------===//
	multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
	SDNode OpNode, SDNode VecNode, OpndItins itins,
	bit IsCommutable> {
	let ExeDomain = _.ExeDomain in {
	defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (VecNode _.RC:$src1, _.RC:$src2,
	(i32 FROUND_CURRENT))),
	itins.rr>;

	defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (VecNode _.RC:$src1,
	_.ScalarIntMemCPat:$src2,
	(i32 FROUND_CURRENT))),
	itins.rm>;
	let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
	def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
	itins.rr> {
	let isCommutable = IsCommutable;
	}
	def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.ScalarMemOp:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.FRC:$dst, (OpNode _.FRC:$src1,
	(_.ScalarLdFrag addr:$src2)))], itins.rm>;
	}
	}
	}

	multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
	SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
	let ExeDomain = _.ExeDomain in
	defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
	"$rc, $src2, $src1", "$src1, $src2, $rc",
	(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	(i32 imm:$rc)), itins.rr, IsCommutable>,
	EVEX_B, EVEX_RC;
	}
	multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
	SDNode OpNode, SDNode VecNode, SDNode SaeNode,
	OpndItins itins, bit IsCommutable> {
	let ExeDomain = _.ExeDomain in {
	defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (VecNode _.RC:$src1, _.RC:$src2)),
	itins.rr>;

	defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (VecNode _.RC:$src1,
	_.ScalarIntMemCPat:$src2)),
	itins.rm>;

	let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
	def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
	itins.rr> {
	let isCommutable = IsCommutable;
	}
	def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.ScalarMemOp:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.FRC:$dst, (OpNode _.FRC:$src1,
	(_.ScalarLdFrag addr:$src2)))], itins.rm>;
	}

	defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"{sae}, $src2, $src1", "$src1, $src2, {sae}",
	(SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	(i32 FROUND_NO_EXC))>, EVEX_B;
	}
	}

	multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode VecNode,
	SizeItins itins, bit IsCommutable> {
	defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
	itins.s, IsCommutable>,
	avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
	itins.s, IsCommutable>,
	XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
	defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
	itins.d, IsCommutable>,
	avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
	itins.d, IsCommutable>,
	XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
	}

	multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode VecNode, SDNode SaeNode,
	SizeItins itins, bit IsCommutable> {
	defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
	VecNode, SaeNode, itins.s, IsCommutable>,
	XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
	defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
	VecNode, SaeNode, itins.d, IsCommutable>,
	XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
	}
	defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
	defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
	defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
	defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
	defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
	SSE_ALU_ITINS_S, 0>;
	defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
	SSE_ALU_ITINS_S, 0>;

	// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
	// X86fminc and X86fmaxc instead of X86fmin and X86fmax
	multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
	let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
	def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
	itins.rr> {
	let isCommutable = 1;
	}
	def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.ScalarMemOp:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set _.FRC:$dst, (OpNode _.FRC:$src1,
	(_.ScalarLdFrag addr:$src2)))], itins.rm>;
	}
	}
	defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
	SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
	EVEX_CD8<32, CD8VT1>;

	defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
	SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
	EVEX_CD8<64, CD8VT1>;

	defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
	SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
	EVEX_CD8<32, CD8VT1>;

	defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
	SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
	EVEX_CD8<64, CD8VT1>;

	multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	X86VectorVTInfo _, OpndItins itins,
	bit IsCommutable> {
	let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
	defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
	IsCommutable>, EVEX_4V;
	let mayLoad = 1 in {
	defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
	EVEX_4V;
	defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
	"${src2}"##_.BroadcastStr##", $src1",
	"$src1, ${src2}"##_.BroadcastStr,
	(OpNode _.RC:$src1, (_.VT (X86VBroadcast
	(_.ScalarLdFrag addr:$src2)))),
	itins.rm>, EVEX_4V, EVEX_B;
	}
	}
	}

	multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in
	defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
	"$rc, $src2, $src1", "$src1, $src2, $rc",
	(_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
	EVEX_4V, EVEX_B, EVEX_RC;
	}


	multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in
	defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
	"{sae}, $src2, $src1", "$src1, $src2, {sae}",
	(_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
	EVEX_4V, EVEX_B;
	}

	multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
	Predicate prd, SizeItins itins,
	bit IsCommutable = 0> {
	let Predicates = [prd] in {
	defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
	itins.s, IsCommutable>, EVEX_V512, PS,
	EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
	itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
	EVEX_CD8<64, CD8VF>;
	}

	// Define only if AVX512VL feature is present.
	let Predicates = [prd, HasVLX] in {
	defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
	itins.s, IsCommutable>, EVEX_V128, PS,
	EVEX_CD8<32, CD8VF>;
	defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
	itins.s, IsCommutable>, EVEX_V256, PS,
	EVEX_CD8<32, CD8VF>;
	defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
	itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
	EVEX_CD8<64, CD8VF>;
	defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
	itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
	EVEX_CD8<64, CD8VF>;
	}
	}

	multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
	defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
	EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
	EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
	}

	multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
	defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
	EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
	EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
	}

	defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
	SSE_ALU_ITINS_P, 1>,
	avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>;
	defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
	SSE_MUL_ITINS_P, 1>,
	avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>;
	defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
	avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>;
	defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
	avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
	defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
	SSE_ALU_ITINS_P, 0>,
	avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd>;
	defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
	SSE_ALU_ITINS_P, 0>,
	avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd>;
	let isCodeGenOnly = 1 in {
	defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
	SSE_ALU_ITINS_P, 1>;
	defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
	SSE_ALU_ITINS_P, 1>;
	}
	defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
	SSE_ALU_ITINS_P, 1>;
	defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
	SSE_ALU_ITINS_P, 0>;
	defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
	SSE_ALU_ITINS_P, 1>;
	defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
	SSE_ALU_ITINS_P, 1>;

	// Patterns catch floating point selects with bitcasted integer logic ops.
	multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
	X86VectorVTInfo _, Predicate prd> {
	let Predicates = [prd] in {
	// Masked register-register logical operations.
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
	_.RC:$src0)),
	(!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
	_.RC:$src1, _.RC:$src2)>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
	_.ImmAllZerosV)),
	(!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
	_.RC:$src2)>;
	// Masked register-memory logical operations.
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(bitconvert (_.i64VT (OpNode _.RC:$src1,
	(load addr:$src2)))),
	_.RC:$src0)),
	(!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
	_.RC:$src1, addr:$src2)>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
	_.ImmAllZerosV)),
	(!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
	addr:$src2)>;
	// Register-broadcast logical operations.
	def : Pat<(_.i64VT (OpNode _.RC:$src1,
	(bitconvert (_.VT (X86VBroadcast
	(_.ScalarLdFrag addr:$src2)))))),
	(!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(bitconvert
	(_.i64VT (OpNode _.RC:$src1,
	(bitconvert (_.VT
	(X86VBroadcast
	(_.ScalarLdFrag addr:$src2))))))),
	_.RC:$src0)),
	(!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
	_.RC:$src1, addr:$src2)>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(bitconvert
	(_.i64VT (OpNode _.RC:$src1,
	(bitconvert (_.VT
	(X86VBroadcast
	(_.ScalarLdFrag addr:$src2))))))),
	_.ImmAllZerosV)),
	(!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
	_.RC:$src1, addr:$src2)>;
	}
	}

	multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
	defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
	defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
	defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
	defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
	defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
	defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
	}

	defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
	defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
	defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
	defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;

	let Predicates = [HasVLX,HasDQI] in {
	// Use packed logical operations for scalar ops.
	def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
	(COPY_TO_REGCLASS (VANDPDZ128rr
	(COPY_TO_REGCLASS FR64X:$src1, VR128X),
	(COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
	def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
	(COPY_TO_REGCLASS (VORPDZ128rr
	(COPY_TO_REGCLASS FR64X:$src1, VR128X),
	(COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
	def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
	(COPY_TO_REGCLASS (VXORPDZ128rr
	(COPY_TO_REGCLASS FR64X:$src1, VR128X),
	(COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
	def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
	(COPY_TO_REGCLASS (VANDNPDZ128rr
	(COPY_TO_REGCLASS FR64X:$src1, VR128X),
	(COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;

	def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
	(COPY_TO_REGCLASS (VANDPSZ128rr
	(COPY_TO_REGCLASS FR32X:$src1, VR128X),
	(COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
	def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
	(COPY_TO_REGCLASS (VORPSZ128rr
	(COPY_TO_REGCLASS FR32X:$src1, VR128X),
	(COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
	def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
	(COPY_TO_REGCLASS (VXORPSZ128rr
	(COPY_TO_REGCLASS FR32X:$src1, VR128X),
	(COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
	def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
	(COPY_TO_REGCLASS (VANDNPSZ128rr
	(COPY_TO_REGCLASS FR32X:$src1, VR128X),
	(COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
	}

	multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, EVEX_4V;
	defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, EVEX_4V;
	defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
	"${src2}"##_.BroadcastStr##", $src1",
	"$src1, ${src2}"##_.BroadcastStr,
	(OpNode _.RC:$src1, (_.VT (X86VBroadcast
	(_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>,
	EVEX_4V, EVEX_B;
	}
	}

	multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>;
	defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
	"$src2, $src1", "$src1, $src2",
	(OpNode _.RC:$src1,
	(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
	(i32 FROUND_CURRENT))>;
	}
	}

	multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
	defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
	avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
	EVEX_V512, EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
	avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
	EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
	defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, f32x_info>,
	avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
	EVEX_4V,EVEX_CD8<32, CD8VT1>;
	defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, f64x_info>,
	avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
	EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;

	// Define only if AVX512VL feature is present.
	let Predicates = [HasVLX] in {
	defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
	EVEX_V128, EVEX_CD8<32, CD8VF>;
	defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f32x_info>,
	EVEX_V256, EVEX_CD8<32, CD8VF>;
	defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v2f64x_info>,
	EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
	defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f64x_info>,
	EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
	}
	}
	defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;

	//===----------------------------------------------------------------------===//
	// AVX-512 VPTESTM instructions
	//===----------------------------------------------------------------------===//

	multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let isCommutable = 1 in
	defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
	EVEX_4V;
	defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1),
	(_.VT (bitconvert (_.LdFrag addr:$src2))))>,
	EVEX_4V,
	EVEX_CD8<_.EltSize, CD8VF>;
	}

	multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
	"${src2}"##_.BroadcastStr##", $src1",
	"$src1, ${src2}"##_.BroadcastStr,
	(OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
	(_.ScalarLdFrag addr:$src2))))>,
	EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
	}

	// Use 512bit version to implement 128/256 bit in case NoVLX.
	multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
	X86VectorVTInfo _, string Suffix> {
	def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
	(_.KVT (COPY_TO_REGCLASS
	(!cast<Instruction>(NAME # Suffix # "Zrr")
	(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
	_.RC:$src1, _.SubRegIdx),
	(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
	_.RC:$src2, _.SubRegIdx)),
	_.KRC))>;
	}

	multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo _, string Suffix> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_vptest<opc, OpcodeStr, OpNode, _.info512>,
	avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;

	let Predicates = [HasAVX512, HasVLX] in {
	defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, _.info256>,
	avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
	defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, _.info128>,
	avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
	}
	let Predicates = [HasAVX512, NoVLX] in {
	defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
	defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
	}
	}

	multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode> {
	defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode,
	avx512vl_i32_info, "D">;
	defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode,
	avx512vl_i64_info, "Q">, VEX_W;
	}

	multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
	SDNode OpNode> {
	let Predicates = [HasBWI] in {
	defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, v32i16_info>,
	EVEX_V512, VEX_W;
	defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, v64i8_info>,
	EVEX_V512;
	}
	let Predicates = [HasVLX, HasBWI] in {

	defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, v16i16x_info>,
	EVEX_V256, VEX_W;
	defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, v8i16x_info>,
	EVEX_V128, VEX_W;
	defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, v32i8x_info>,
	EVEX_V256;
	defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, v16i8x_info>,
	EVEX_V128;
	}

	let Predicates = [HasAVX512, NoVLX] in {
	defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">;
	defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">;
	defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
	defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
	}

	}

	multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
	SDNode OpNode> :
	avx512_vptest_wb <opc_wb, OpcodeStr, OpNode>,
	avx512_vptest_dq<opc_dq, OpcodeStr, OpNode>;

	defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm>, T8PD;
	defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm>, T8XS;


	//===----------------------------------------------------------------------===//
	// AVX-512 Shift instructions
	//===----------------------------------------------------------------------===//
	multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
	string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
	(ins _.RC:$src1, u8imm:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
	SSE_INTSHIFT_ITINS_P.rr>;
	defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
	(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
	(i8 imm:$src2))),
	SSE_INTSHIFT_ITINS_P.rm>;
	}
	}

	multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
	string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in
	defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
	"$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
	(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
	SSE_INTSHIFT_ITINS_P.rm>, EVEX_B;
	}

	multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
	// src2 is always 128-bit
	let ExeDomain = _.ExeDomain in {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, VR128X:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
	SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, i128mem:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
	SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
	EVEX_4V;
	}
	}

	multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
	ValueType SrcVT, PatFrag bc_frag,
	AVX512VLVectorVTInfo VTInfo, Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
	VTInfo.info512>, EVEX_V512,
	EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
	VTInfo.info256>, EVEX_V256,
	EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
	defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
	VTInfo.info128>, EVEX_V128,
	EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
	}
	}

	multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
	string OpcodeStr, SDNode OpNode> {
	defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
	avx512vl_i32_info, HasAVX512>;
	defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
	avx512vl_i64_info, HasAVX512>, VEX_W;
	defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, v8i16, bc_v8i16,
	avx512vl_i16_info, HasBWI>;
	}

	multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
	string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo> {
	let Predicates = [HasAVX512] in
	defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	VTInfo.info512>,
	avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
	VTInfo.info512>, EVEX_V512;
	let Predicates = [HasAVX512, HasVLX] in {
	defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	VTInfo.info256>,
	avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
	VTInfo.info256>, EVEX_V256;
	defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	VTInfo.info128>,
	avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
	VTInfo.info128>, EVEX_V128;
	}
	}

	multiclass avx512_shift_rmi_w<bits<8> opcw,
	Format ImmFormR, Format ImmFormM,
	string OpcodeStr, SDNode OpNode> {
	let Predicates = [HasBWI] in
	defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	v32i16_info>, EVEX_V512;
	let Predicates = [HasVLX, HasBWI] in {
	defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	v16i16x_info>, EVEX_V256;
	defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	v8i16x_info>, EVEX_V128;
	}
	}

	multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
	Format ImmFormR, Format ImmFormM,
	string OpcodeStr, SDNode OpNode> {
	defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
	avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
	defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
	avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
	}

	defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>,
	avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V;

	defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
	avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V;

	defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>,
	avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V;

	defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri>, AVX512BIi8Base, EVEX_4V;
	defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli>, AVX512BIi8Base, EVEX_4V;

	defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
	defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
	defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>;

	// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
	let Predicates = [HasAVX512, NoVLX] in {
	def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPSRAQZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	VR128X:$src2)), sub_ymm)>;

	def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPSRAQZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	VR128X:$src2)), sub_xmm)>;

	def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPSRAQZri
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	imm:$src2)), sub_ymm)>;

	def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPSRAQZri
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	imm:$src2)), sub_xmm)>;
	}

	//===-------------------------------------------------------------------===//
	// Variable Bit Shifts
	//===-------------------------------------------------------------------===//
	multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
	SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1,
	(_.VT (bitconvert (_.LdFrag addr:$src2))))),
	SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
	EVEX_CD8<_.EltSize, CD8VF>;
	}
	}

	multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in
	defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
	"${src2}"##_.BroadcastStr##", $src1",
	"$src1, ${src2}"##_.BroadcastStr,
	(_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
	(_.ScalarLdFrag addr:$src2))))),
	SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
	EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
	}

	multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
	avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;

	let Predicates = [HasAVX512, HasVLX] in {
	defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
	avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
	defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
	avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
	}
	}

	multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
	SDNode OpNode> {
	defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode,
	avx512vl_i32_info>;
	defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode,
	avx512vl_i64_info>, VEX_W;
	}

	// Use 512bit version to implement 128/256 bit in case NoVLX.
	multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
	SDNode OpNode, list<Predicate> p> {
	let Predicates = p in {
	def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
	(_.info256.VT _.info256.RC:$src2))),
	(EXTRACT_SUBREG
	(!cast<Instruction>(OpcodeStr#"Zrr")
	(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
	(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
	sub_ymm)>;

	def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
	(_.info128.VT _.info128.RC:$src2))),
	(EXTRACT_SUBREG
	(!cast<Instruction>(OpcodeStr#"Zrr")
	(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
	(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
	sub_xmm)>;
	}
	}
	multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
	SDNode OpNode> {
	let Predicates = [HasBWI] in
	defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, v32i16_info>,
	EVEX_V512, VEX_W;
	let Predicates = [HasVLX, HasBWI] in {

	defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, v16i16x_info>,
	EVEX_V256, VEX_W;
	defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, v8i16x_info>,
	EVEX_V128, VEX_W;
	}
	}

	defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
	avx512_var_shift_w<0x12, "vpsllvw", shl>;

	defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
	avx512_var_shift_w<0x11, "vpsravw", sra>;

	defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
	avx512_var_shift_w<0x10, "vpsrlvw", srl>;

	defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
	defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;

	defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
	defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
	defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
	defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;

	// Special handing for handling VPSRAV intrinsics.
	multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
	list<Predicate> p> {
	let Predicates = p in {
	def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
	(!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
	_.RC:$src2)>;
	def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
	(!cast<Instruction>(InstrStr#_.ZSuffix##rm)
	_.RC:$src1, addr:$src2)>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
	(!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
	_.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
	_.RC:$src0)),
	(!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
	_.KRC:$mask, _.RC:$src1, addr:$src2)>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
	(!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
	_.RC:$src1, _.RC:$src2)>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
	_.ImmAllZerosV)),
	(!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
	_.RC:$src1, addr:$src2)>;
	}
	}

	multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
	list<Predicate> p> :
	avx512_var_shift_int_lowering<InstrStr, _, p> {
	let Predicates = p in {
	def : Pat<(_.VT (X86vsrav _.RC:$src1,
	(X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
	(!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
	_.RC:$src1, addr:$src2)>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(X86vsrav _.RC:$src1,
	(X86VBroadcast (_.ScalarLdFrag addr:$src2))),
	_.RC:$src0)),
	(!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
	_.KRC:$mask, _.RC:$src1, addr:$src2)>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(X86vsrav _.RC:$src1,
	(X86VBroadcast (_.ScalarLdFrag addr:$src2))),
	_.ImmAllZerosV)),
	(!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
	_.RC:$src1, addr:$src2)>;
	}
	}

	defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
	defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
	defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
	defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
	defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
	defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
	defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
	defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
	defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;


	// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
	let Predicates = [HasAVX512, NoVLX] in {
	def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPROLVQZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	(INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
	sub_xmm)>;
	def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPROLVQZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
	sub_ymm)>;

	def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPROLVDZrr
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	(INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
	sub_xmm)>;
	def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPROLVDZrr
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
	sub_ymm)>;

	def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPROLQZri
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	imm:$src2)), sub_xmm)>;
	def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPROLQZri
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	imm:$src2)), sub_ymm)>;

	def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPROLDZri
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	imm:$src2)), sub_xmm)>;
	def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPROLDZri
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	imm:$src2)), sub_ymm)>;
	}

	// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
	let Predicates = [HasAVX512, NoVLX] in {
	def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPRORVQZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	(INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
	sub_xmm)>;
	def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPRORVQZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
	sub_ymm)>;

	def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPRORVDZrr
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	(INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
	sub_xmm)>;
	def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPRORVDZrr
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	(INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
	sub_ymm)>;

	def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPRORQZri
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	imm:$src2)), sub_xmm)>;
	def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
	(EXTRACT_SUBREG (v8i64
	(VPRORQZri
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	imm:$src2)), sub_ymm)>;

	def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPRORDZri
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
	imm:$src2)), sub_xmm)>;
	def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
	(EXTRACT_SUBREG (v16i32
	(VPRORDZri
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
	imm:$src2)), sub_ymm)>;
	}

	//===-------------------------------------------------------------------===//
	// 1-src variable permutation VPERMW/D/Q
	//===-------------------------------------------------------------------===//
	multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo _> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
	avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;

	let Predicates = [HasAVX512, HasVLX] in
	defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
	avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
	}

	multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
	string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo> {
	let Predicates = [HasAVX512] in
	defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	VTInfo.info512>,
	avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
	VTInfo.info512>, EVEX_V512;
	let Predicates = [HasAVX512, HasVLX] in
	defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
	VTInfo.info256>,
	avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
	VTInfo.info256>, EVEX_V256;
	}

	multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
	Predicate prd, SDNode OpNode,
	AVX512VLVectorVTInfo _> {
	let Predicates = [prd] in
	defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
	EVEX_V512 ;
	let Predicates = [HasVLX, prd] in {
	defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
	EVEX_V256 ;
	defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
	EVEX_V128 ;
	}
	}

	defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
	avx512vl_i16_info>, VEX_W;
	defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
	avx512vl_i8_info>;

	defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
	avx512vl_i32_info>;
	defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
	avx512vl_i64_info>, VEX_W;
	defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
	avx512vl_f32_info>;
	defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
	avx512vl_f64_info>, VEX_W;

	defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
	X86VPermi, avx512vl_i64_info>,
	EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
	defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
	X86VPermi, avx512vl_f64_info>,
	EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
	//===----------------------------------------------------------------------===//
	// AVX-512 - VPERMIL
	//===----------------------------------------------------------------------===//

	multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, X86VectorVTInfo Ctrl> {
	defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode _.RC:$src1,
	(Ctrl.VT Ctrl.RC:$src2)))>,
	T8PD, EVEX_4V;
	defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode
	_.RC:$src1,
	(Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
	T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
	defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
	"${src2}"##_.BroadcastStr##", $src1",
	"$src1, ${src2}"##_.BroadcastStr,
	(_.VT (OpNode
	_.RC:$src1,
	(Ctrl.VT (X86VBroadcast
	(Ctrl.ScalarLdFrag addr:$src2)))))>,
	T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
	}

	multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
	AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
	let Predicates = [HasAVX512] in {
	defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info512,
	Ctrl.info512>, EVEX_V512;
	}
	let Predicates = [HasAVX512, HasVLX] in {
	defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info128,
	Ctrl.info128>, EVEX_V128;
	defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info256,
	Ctrl.info256>, EVEX_V256;
	}
	}

	multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
	AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{

	defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, _, Ctrl>;
	defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
	X86VPermilpi, _>,
	EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
	}

	let ExeDomain = SSEPackedSingle in
	defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
	avx512vl_i32_info>;
	let ExeDomain = SSEPackedDouble in
	defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
	avx512vl_i64_info>, VEX_W;
	//===----------------------------------------------------------------------===//
	// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
	//===----------------------------------------------------------------------===//

	defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
	X86PShufd, avx512vl_i32_info>,
	EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
	defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
	X86PShufhw>, EVEX, AVX512XSIi8Base;
	defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
	X86PShuflw>, EVEX, AVX512XDIi8Base;

	multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
	let Predicates = [HasBWI] in
	defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;

	let Predicates = [HasVLX, HasBWI] in {
	defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256;
	defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128;
	}
	}

	defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;

	//===----------------------------------------------------------------------===//
	// Move Low to High and High to Low packed FP Instructions
	//===----------------------------------------------------------------------===//
	def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
	(ins VR128X:$src1, VR128X:$src2),
	"vmovlhps\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
	IIC_SSE_MOV_LH>, EVEX_4V;
	def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
	(ins VR128X:$src1, VR128X:$src2),
	"vmovhlps\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
	IIC_SSE_MOV_LH>, EVEX_4V;

	let Predicates = [HasAVX512] in {
	// MOVLHPS patterns
	def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
	(VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
	def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
	(VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;

	// MOVHLPS patterns
	def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
	(VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
	}

	//===----------------------------------------------------------------------===//
	// VMOVHPS/PD VMOVLPS Instructions
	// All patterns was taken from SSS implementation.
	//===----------------------------------------------------------------------===//
	multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in
	def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.RC:$src1, f64mem:$src2),
	!strconcat(OpcodeStr,
	"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.RC:$dst,
	(OpNode _.RC:$src1,
	(_.VT (bitconvert
	(v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
	IIC_SSE_MOV_LH>, EVEX_4V;
	}

	defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
	v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
	defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd,
	v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
	defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
	v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
	defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
	v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;

	let Predicates = [HasAVX512] in {
	// VMOVHPS patterns
	def : Pat<(X86Movlhps VR128X:$src1,
	(bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
	(VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
	def : Pat<(X86Movlhps VR128X:$src1,
	(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
	(VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
	// VMOVHPD patterns
	def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
	(scalar_to_vector (loadf64 addr:$src2)))),
	(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
	def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
	(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
	(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
	// VMOVLPS patterns
	def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
	(VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
	def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))),
	(VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
	// VMOVLPD patterns
	def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
	(VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
	def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
	(VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
	def : Pat<(v2f64 (X86Movsd VR128X:$src1,
	(v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
	(VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
	}

	def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
	(ins f64mem:$dst, VR128X:$src),
	"vmovhps\t{$src, $dst\|$dst, $src}",
	[(store (f64 (extractelt
	(X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
	(bc_v2f64 (v4f32 VR128X:$src))),
	(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
	EVEX, EVEX_CD8<32, CD8VT2>;
	def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
	(ins f64mem:$dst, VR128X:$src),
	"vmovhpd\t{$src, $dst\|$dst, $src}",
	[(store (f64 (extractelt
	(v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
	(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
	EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
	def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
	(ins f64mem:$dst, VR128X:$src),
	"vmovlps\t{$src, $dst\|$dst, $src}",
	[(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
	(iPTR 0))), addr:$dst)],
	IIC_SSE_MOV_LH>,
	EVEX, EVEX_CD8<32, CD8VT2>;
	def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
	(ins f64mem:$dst, VR128X:$src),
	"vmovlpd\t{$src, $dst\|$dst, $src}",
	[(store (f64 (extractelt (v2f64 VR128X:$src),
	(iPTR 0))), addr:$dst)],
	IIC_SSE_MOV_LH>,
	EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;

	let Predicates = [HasAVX512] in {
	// VMOVHPD patterns
	def : Pat<(store (f64 (extractelt
	(v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
	(iPTR 0))), addr:$dst),
	(VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
	// VMOVLPS patterns
	def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
	addr:$src1),
	(VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
	def : Pat<(store (v4i32 (X86Movlps
	(bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1),
	(VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
	// VMOVLPD patterns
	def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
	addr:$src1),
	(VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
	def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
	addr:$src1),
	(VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
	}
	//===----------------------------------------------------------------------===//
	// FMA - Fused Multiply Operations
	//

	multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, string Suff> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
	defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
	AVX512FMA3Base;

	defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
	AVX512FMA3Base;

	defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
	!strconcat("$src2, ${src3}", _.BroadcastStr ),
	(OpNode _.RC:$src2,
	_.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
	AVX512FMA3Base, EVEX_B;
	}

	// Additional pattern for folding broadcast nodes in other orders.
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode _.RC:$src1, _.RC:$src2,
	(X86VBroadcast (_.ScalarLdFrag addr:$src3))),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
	_.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
	}

	multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, string Suff> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
	defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
	OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
	(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
	AVX512FMA3Base, EVEX_B, EVEX_RC;
	}

	multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
	string Suff> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
	avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
	Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
	}
	let Predicates = [HasVLX, HasAVX512] in {
	defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
	EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
	defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
	EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
	}
	}

	multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNodeRnd > {
	defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
	avx512vl_f32_info, "PS">;
	defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
	avx512vl_f64_info, "PD">, VEX_W;
	}

	defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
	defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
	defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
	defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
	defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
	defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;


	multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, string Suff> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
	defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
	AVX512FMA3Base;

	defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
	AVX512FMA3Base;

	defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
	"$src2, ${src3}"##_.BroadcastStr,
	(_.VT (OpNode _.RC:$src2,
	(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
	_.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B;
	}

	// Additional patterns for folding broadcast nodes in other orders.
	def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	_.RC:$src2, _.RC:$src1)),
	(!cast<Instruction>(NAME#Suff#_.ZSuffix#mb) _.RC:$src1,
	_.RC:$src2, addr:$src3)>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	_.RC:$src2, _.RC:$src1),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
	_.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	_.RC:$src2, _.RC:$src1),
	_.ImmAllZerosV)),
	(!cast<Instruction>(NAME#Suff#_.ZSuffix#mbkz) _.RC:$src1,
	_.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
	}

	multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, string Suff> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
	defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
	OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
	(_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))), 1, 1>,
	AVX512FMA3Base, EVEX_B, EVEX_RC;
	}

	multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
	string Suff> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
	avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
	Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
	}
	let Predicates = [HasVLX, HasAVX512] in {
	defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
	EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
	defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
	EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
	}
	}

	multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNodeRnd > {
	defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
	avx512vl_f32_info, "PS">;
	defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
	avx512vl_f64_info, "PD">, VEX_W;
	}

	defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
	defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
	defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
	defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
	defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
	defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;

	multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, string Suff> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
	defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
	AVX512FMA3Base;

	defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src3), _.RC:$src2)), 1, 0>,
	AVX512FMA3Base;

	defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
	"$src2, ${src3}"##_.BroadcastStr,
	(_.VT (OpNode _.RC:$src1,
	(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
	_.RC:$src2)), 1, 0>, AVX512FMA3Base, EVEX_B;
	}

	// Additional patterns for folding broadcast nodes in other orders.
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	_.RC:$src1, _.RC:$src2),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
	_.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
	}

	multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, string Suff> {
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
	defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
	OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
	(_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))), 1, 1>,
	AVX512FMA3Base, EVEX_B, EVEX_RC;
	}

	multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
	string Suff> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
	avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
	Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
	}
	let Predicates = [HasVLX, HasAVX512] in {
	defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
	EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
	defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
	EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
	}
	}

	multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNodeRnd > {
	defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
	avx512vl_f32_info, "PS">;
	defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
	avx512vl_f64_info, "PD">, VEX_W;
	}

	defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
	defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
	defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
	defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
	defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
	defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;

	// Scalar FMA
	let Constraints = "$src1 = $dst" in {
	multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
	dag RHS_r, dag RHS_m > {
	defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3), OpcodeStr,
	"$src3, $src2", "$src2, $src3", RHS_VEC_r, 1, 1>, AVX512FMA3Base;

	defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
	"$src3, $src2", "$src2, $src3", RHS_VEC_m, 1, 1>, AVX512FMA3Base;

	defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
	OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb, 1, 1>,
	AVX512FMA3Base, EVEX_B, EVEX_RC;

	let isCodeGenOnly = 1, isCommutable = 1 in {
	def r : AVX512FMA3<opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
	!strconcat(OpcodeStr,
	"\t{$src3, $src2, $dst\|$dst, $src2, $src3}"),
	[RHS_r]>;
	def m : AVX512FMA3<opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
	!strconcat(OpcodeStr,
	"\t{$src3, $src2, $dst\|$dst, $src2, $src3}"),
	[RHS_m]>;
	}// isCodeGenOnly = 1
	}
	}// Constraints = "$src1 = $dst"

	multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
	string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
	SDNode OpNodeRnds3, X86VectorVTInfo _ , string SUFF> {
	let ExeDomain = _.ExeDomain in {
	defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
	// Operands for intrinsic are in 123 order to preserve passthu
	// semantics.
	(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 FROUND_CURRENT))),
	(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2,
	_.ScalarIntMemCPat:$src3, (i32 FROUND_CURRENT))),
	(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
	(i32 imm:$rc))),
	(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
	_.FRC:$src3))),
	(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
	(_.ScalarLdFrag addr:$src3))))>;

	defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
	(_.VT (OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 FROUND_CURRENT))),
	(_.VT (OpNodeRnds3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
	_.RC:$src1, (i32 FROUND_CURRENT))),
	(_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
	(i32 imm:$rc))),
	(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
	_.FRC:$src1))),
	(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
	(_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>;

	defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
	(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 FROUND_CURRENT))),
	(_.VT (OpNodeRnds1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
	_.RC:$src2, (i32 FROUND_CURRENT))),
	(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2,
	(i32 imm:$rc))),
	(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
	_.FRC:$src2))),
	(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1,
	(_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>;
	}
	}

	multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
	string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
	SDNode OpNodeRnds3> {
	let Predicates = [HasAVX512] in {
	defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
	OpNodeRnds1, OpNodeRnds3, f32x_info, "SS">,
	EVEX_CD8<32, CD8VT1>, VEX_LIG;
	defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
	OpNodeRnds1, OpNodeRnds3, f64x_info, "SD">,
	EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
	}
	}

	defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnds1,
	X86FmaddRnds3>;
	defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnds1,
	X86FmsubRnds3>;
	defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd,
	X86FnmaddRnds1, X86FnmaddRnds3>;
	defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub,
	X86FnmsubRnds1, X86FnmsubRnds3>;

	//===----------------------------------------------------------------------===//
	// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
	//===----------------------------------------------------------------------===//
	let Constraints = "$src1 = $dst" in {
	multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
	AVX512FMA3Base;

	defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3),
	OpcodeStr, "$src3, $src2", "$src2, $src3",
	(_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
	AVX512FMA3Base;

	defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3),
	OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
	!strconcat("$src2, ${src3}", _.BroadcastStr ),
	(OpNode _.RC:$src1,
	_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
	AVX512FMA3Base, EVEX_B;
	}
	}
	} // Constraints = "$src1 = $dst"

	multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo _> {
	let Predicates = [HasIFMA] in {
	defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info512>,
	EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
	}
	let Predicates = [HasVLX, HasIFMA] in {
	defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info256>,
	EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
	defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info128>,
	EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
	}
	}

	defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
	avx512vl_i64_info>, VEX_W;
	defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
	avx512vl_i64_info>, VEX_W;

	//===----------------------------------------------------------------------===//
	// AVX-512 Scalar convert from sign integer to float/double
	//===----------------------------------------------------------------------===//

	multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
	X86VectorVTInfo DstVT, X86MemOperand x86memop,
	PatFrag ld_frag, string asm> {
	let hasSideEffects = 0 in {
	def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
	(ins DstVT.FRC:$src1, SrcRC:$src),
	!strconcat(asm,"\t{$src, $src1, $dst\|$dst, $src1, $src}"), []>,
	EVEX_4V;
	let mayLoad = 1 in
	def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
	(ins DstVT.FRC:$src1, x86memop:$src),
	!strconcat(asm,"\t{$src, $src1, $dst\|$dst, $src1, $src}"), []>,
	EVEX_4V;
	} // hasSideEffects = 0
	let isCodeGenOnly = 1 in {
	def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
	(ins DstVT.RC:$src1, SrcRC:$src2),
	!strconcat(asm,"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set DstVT.RC:$dst,
	(OpNode (DstVT.VT DstVT.RC:$src1),
	SrcRC:$src2,
	(i32 FROUND_CURRENT)))]>, EVEX_4V;

	def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
	(ins DstVT.RC:$src1, x86memop:$src2),
	!strconcat(asm,"\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set DstVT.RC:$dst,
	(OpNode (DstVT.VT DstVT.RC:$src1),
	(ld_frag addr:$src2),
	(i32 FROUND_CURRENT)))]>, EVEX_4V;
	}//isCodeGenOnly = 1
	}

	multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
	X86VectorVTInfo DstVT, string asm> {
	def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
	(ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
	!strconcat(asm,
	"\t{$src2, $rc, $src1, $dst\|$dst, $src1, $rc, $src2}"),
	[(set DstVT.RC:$dst,
	(OpNode (DstVT.VT DstVT.RC:$src1),
	SrcRC:$src2,
	(i32 imm:$rc)))]>, EVEX_4V, EVEX_B, EVEX_RC;
	}

	multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
	X86VectorVTInfo DstVT, X86MemOperand x86memop,
	PatFrag ld_frag, string asm> {
	defm NAME : avx512_vcvtsi_round<opc, OpNode, SrcRC, DstVT, asm>,
	avx512_vcvtsi<opc, OpNode, SrcRC, DstVT, x86memop, ld_frag, asm>,
	VEX_LIG;
	}

	let Predicates = [HasAVX512] in {
	defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
	v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
	XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
	v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
	XS, VEX_W, EVEX_CD8<64, CD8VT1>;
	defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
	v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
	XD, EVEX_CD8<32, CD8VT1>;
	defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
	v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
	XD, VEX_W, EVEX_CD8<64, CD8VT1>;

	def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst\|$dst, $src1, $src}",
	(VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
	def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst\|$dst, $src1, $src}",
	(VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;

	def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
	(VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
	(VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
	(VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
	(VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;

	def : Pat<(f32 (sint_to_fp GR32:$src)),
	(VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
	def : Pat<(f32 (sint_to_fp GR64:$src)),
	(VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
	def : Pat<(f64 (sint_to_fp GR32:$src)),
	(VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
	def : Pat<(f64 (sint_to_fp GR64:$src)),
	(VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;

	defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32,
	v4f32x_info, i32mem, loadi32,
	"cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
	v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
	XS, VEX_W, EVEX_CD8<64, CD8VT1>;
	defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info,
	i32mem, loadi32, "cvtusi2sd{l}">,
	XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
	defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
	v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
	XD, VEX_W, EVEX_CD8<64, CD8VT1>;

	def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst\|$dst, $src1, $src}",
	(VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
	def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst\|$dst, $src1, $src}",
	(VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;

	def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
	(VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
	(VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
	(VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
	def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
	(VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;

	def : Pat<(f32 (uint_to_fp GR32:$src)),
	(VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
	def : Pat<(f32 (uint_to_fp GR64:$src)),
	(VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
	def : Pat<(f64 (uint_to_fp GR32:$src)),
	(VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
	def : Pat<(f64 (uint_to_fp GR64:$src)),
	(VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 Scalar convert from float/double to integer
	//===----------------------------------------------------------------------===//
	multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT ,
	X86VectorVTInfo DstVT, SDNode OpNode, string asm> {
	let Predicates = [HasAVX512] in {
	def rr : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
	EVEX, VEX_LIG;
	def rb : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
	!strconcat(asm,"\t{$rc, $src, $dst\|$dst, $src, $rc}"),
	[(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
	EVEX, VEX_LIG, EVEX_B, EVEX_RC;
	def rm : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set DstVT.RC:$dst, (OpNode
	(SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
	(i32 FROUND_CURRENT)))]>,
	EVEX, VEX_LIG;
	} // Predicates = [HasAVX512]
	}

	// Convert float/double to signed/unsigned int 32/64
	defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
	X86cvts2si, "cvtss2si">,
	XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
	X86cvts2si, "cvtss2si">,
	XS, VEX_W, EVEX_CD8<32, CD8VT1>;
	defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info,
	X86cvts2usi, "cvtss2usi">,
	XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info,
	X86cvts2usi, "cvtss2usi">, XS, VEX_W,
	EVEX_CD8<32, CD8VT1>;
	defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
	X86cvts2si, "cvtsd2si">,
	XD, EVEX_CD8<64, CD8VT1>;
	defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
	X86cvts2si, "cvtsd2si">,
	XD, VEX_W, EVEX_CD8<64, CD8VT1>;
	defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info,
	X86cvts2usi, "cvtsd2usi">,
	XD, EVEX_CD8<64, CD8VT1>;
	defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info,
	X86cvts2usi, "cvtsd2usi">, XD, VEX_W,
	EVEX_CD8<64, CD8VT1>;

	// The SSE version of these instructions are disabled for AVX512.
	// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
	let Predicates = [HasAVX512] in {
	def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
	(VCVTSS2SIZrr VR128X:$src)>;
	def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
	(VCVTSS2SIZrm sse_load_f32:$src)>;
	def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
	(VCVTSS2SI64Zrr VR128X:$src)>;
	def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
	(VCVTSS2SI64Zrm sse_load_f32:$src)>;
	def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
	(VCVTSD2SIZrr VR128X:$src)>;
	def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
	(VCVTSD2SIZrm sse_load_f64:$src)>;
	def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
	(VCVTSD2SI64Zrr VR128X:$src)>;
	def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
	(VCVTSD2SI64Zrm sse_load_f64:$src)>;
	} // HasAVX512

	let Predicates = [HasAVX512] in {
	def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
	(VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
	def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
	(VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
	def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
	(VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
	def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
	(VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
	def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
	(VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
	def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
	(VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
	def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
	(VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
	def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
	(VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
	def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
	(VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
	def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
	(VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
	} // Predicates = [HasAVX512]

	// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
	// which produce unnecessary vmovs{s,d} instructions
	let Predicates = [HasAVX512] in {
	def : Pat<(v4f32 (X86Movss
	(v4f32 VR128X:$dst),
	(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
	(VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;

	def : Pat<(v4f32 (X86Movss
	(v4f32 VR128X:$dst),
	(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
	(VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;

	def : Pat<(v2f64 (X86Movsd
	(v2f64 VR128X:$dst),
	(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
	(VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;

	def : Pat<(v2f64 (X86Movsd
	(v2f64 VR128X:$dst),
	(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
	(VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
	} // Predicates = [HasAVX512]

	// Convert float/double to signed/unsigned int 32/64 with truncation
	multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
	X86VectorVTInfo _DstRC, SDNode OpNode,
	SDNode OpNodeRnd, string aliasStr>{
	let Predicates = [HasAVX512] in {
	def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;
	let hasSideEffects = 0 in
	def rb : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
	!strconcat(asm,"\t{{sae}, $src, $dst\|$dst, $src, {sae}}"),
	[]>, EVEX, EVEX_B;
	def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
	EVEX;

	def : InstAlias<asm # aliasStr # "\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "rr") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
	def : InstAlias<asm # aliasStr # "\t\t{{sae}, $src, $dst\|$dst, $src, {sae}}",
	(!cast<Instruction>(NAME # "rb") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
	def : InstAlias<asm # aliasStr # "\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "rm") _DstRC.RC:$dst,
	_SrcRC.ScalarMemOp:$src), 0>;

	let isCodeGenOnly = 1 in {
	def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
	(i32 FROUND_CURRENT)))]>, EVEX, VEX_LIG;
	def rb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
	!strconcat(asm,"\t{{sae}, $src, $dst\|$dst, $src, {sae}}"),
	[(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
	(i32 FROUND_NO_EXC)))]>,
	EVEX,VEX_LIG , EVEX_B;
	let mayLoad = 1, hasSideEffects = 0 in
	def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
	(ins _SrcRC.IntScalarMemOp:$src),
	!strconcat(asm,"\t{$src, $dst\|$dst, $src}"),
	[]>, EVEX, VEX_LIG;

	} // isCodeGenOnly = 1
	} //HasAVX512
	}


	defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
	fp_to_sint, X86cvtts2IntRnd, "{l}">,
	XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
	fp_to_sint, X86cvtts2IntRnd, "{q}">,
	VEX_W, XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
	fp_to_sint, X86cvtts2IntRnd, "{l}">,
	XD, EVEX_CD8<64, CD8VT1>;
	defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
	fp_to_sint, X86cvtts2IntRnd, "{q}">,
	VEX_W, XD, EVEX_CD8<64, CD8VT1>;

	defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
	fp_to_uint, X86cvtts2UIntRnd, "{l}">,
	XS, EVEX_CD8<32, CD8VT1>;
	defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
	fp_to_uint, X86cvtts2UIntRnd, "{q}">,
	XS,VEX_W, EVEX_CD8<32, CD8VT1>;
	defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
	fp_to_uint, X86cvtts2UIntRnd, "{l}">,
	XD, EVEX_CD8<64, CD8VT1>;
	defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
	fp_to_uint, X86cvtts2UIntRnd, "{q}">,
	XD, VEX_W, EVEX_CD8<64, CD8VT1>;
	let Predicates = [HasAVX512] in {
	def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
	(VCVTTSS2SIZrr_Int VR128X:$src)>;
	def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
	(VCVTTSS2SIZrm_Int ssmem:$src)>;
	def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
	(VCVTTSS2SI64Zrr_Int VR128X:$src)>;
	def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
	(VCVTTSS2SI64Zrm_Int ssmem:$src)>;
	def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
	(VCVTTSD2SIZrr_Int VR128X:$src)>;
	def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
	(VCVTTSD2SIZrm_Int sdmem:$src)>;
	def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
	(VCVTTSD2SI64Zrr_Int VR128X:$src)>;
	def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
	(VCVTTSD2SI64Zrm_Int sdmem:$src)>;
	} // HasAVX512
	//===----------------------------------------------------------------------===//
	// AVX-512 Convert form float to double and back
	//===----------------------------------------------------------------------===//
	multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDNode OpNode> {
	defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode (_.VT _.RC:$src1),
	(_Src.VT _Src.RC:$src2),
	(i32 FROUND_CURRENT)))>,
	EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
	defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(_.VT (OpNode (_.VT _.RC:$src1),
	(_Src.VT _Src.ScalarIntMemCPat:$src2),
	(i32 FROUND_CURRENT)))>,
	EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;

	let isCodeGenOnly = 1, hasSideEffects = 0 in {
	def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _Src.FRC:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>,
	EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
	let mayLoad = 1 in
	def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>,
	EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
	}
	}

	// Scalar Coversion with SAE - suppress all exceptions
	multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDNode OpNodeRnd> {
	defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
	"{sae}, $src2, $src1", "$src1, $src2, {sae}",
	(_.VT (OpNodeRnd (_.VT _.RC:$src1),
	(_Src.VT _Src.RC:$src2),
	(i32 FROUND_NO_EXC)))>,
	EVEX_4V, VEX_LIG, EVEX_B;
	}

	// Scalar Conversion with rounding control (RC)
	multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDNode OpNodeRnd> {
	defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
	"$rc, $src2, $src1", "$src1, $src2, $rc",
	(_.VT (OpNodeRnd (_.VT _.RC:$src1),
	(_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
	EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
	EVEX_B, EVEX_RC;
	}
	multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
	SDNode OpNodeRnd, X86VectorVTInfo _src,
	X86VectorVTInfo _dst> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
	avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
	OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
	}
	}

	multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
	SDNode OpNodeRnd, X86VectorVTInfo _src,
	X86VectorVTInfo _dst> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
	avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
	EVEX_CD8<32, CD8VT1>, XS;
	}
	}
	defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
	X86froundRnd, f64x_info, f32x_info>;
	defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
	X86fpextRnd,f32x_info, f64x_info >;

	def : Pat<(f64 (fpextend FR32X:$src)),
	(VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>,
	Requires<[HasAVX512]>;
	def : Pat<(f64 (fpextend (loadf32 addr:$src))),
	(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
	Requires<[HasAVX512]>;

	def : Pat<(f64 (extloadf32 addr:$src)),
	(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
	Requires<[HasAVX512, OptForSize]>;

	def : Pat<(f64 (extloadf32 addr:$src)),
	(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
	Requires<[HasAVX512, OptForSpeed]>;

	def : Pat<(f32 (fpround FR64X:$src)),
	(VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>,
	Requires<[HasAVX512]>;

	def : Pat<(v4f32 (X86Movss
	(v4f32 VR128X:$dst),
	(v4f32 (scalar_to_vector
	(f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
	(VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
	Requires<[HasAVX512]>;

	def : Pat<(v2f64 (X86Movsd
	(v2f64 VR128X:$dst),
	(v2f64 (scalar_to_vector
	(f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
	(VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
	Requires<[HasAVX512]>;

	//===----------------------------------------------------------------------===//
	// AVX-512 Vector convert from signed/unsigned integer to float/double
	// and from float/double to signed/unsigned integer
	//===----------------------------------------------------------------------===//

	multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDNode OpNode,
	string Broadcast = _.BroadcastStr,
	string Alias = "", X86MemOperand MemOp = _Src.MemOp> {

	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _Src.RC:$src), OpcodeStr, "$src", "$src",
	(_.VT (OpNode (_Src.VT _Src.RC:$src)))>, EVEX;

	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
	(_.VT (OpNode (_Src.VT
	(bitconvert (_Src.LdFrag addr:$src)))))>, EVEX;

	defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _Src.ScalarMemOp:$src), OpcodeStr,
	"${src}"##Broadcast, "${src}"##Broadcast,
	(_.VT (OpNode (_Src.VT
	(X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
	))>, EVEX, EVEX_B;
	}
	// Coversion with SAE - suppress all exceptions
	multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDNode OpNodeRnd> {
	defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _Src.RC:$src), OpcodeStr,
	"{sae}, $src", "$src, {sae}",
	(_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
	(i32 FROUND_NO_EXC)))>,
	EVEX, EVEX_B;
	}

	// Conversion with rounding control (RC)
	multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86VectorVTInfo _Src, SDNode OpNodeRnd> {
	defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
	"$rc, $src", "$src, $rc",
	(_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
	EVEX, EVEX_B, EVEX_RC;
	}

	// Extend Float to Double
	multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, fpextend>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
	X86vfpextRnd>, EVEX_V512;
	}
	let Predicates = [HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
	X86vfpext, "{1to2}", "", f64mem>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend>,
	EVEX_V256;
	}
	}

	// Truncate Double to Float
	multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
	X86vfproundRnd>, EVEX_V512;
	}
	let Predicates = [HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
	X86vfpround, "{1to2}", "{x}">, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
	"{1to4}", "{y}">, EVEX_V256;

	def : InstAlias<OpcodeStr##"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
	def : InstAlias<OpcodeStr##"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
	def : InstAlias<OpcodeStr##"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
	def : InstAlias<OpcodeStr##"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
	}
	}

	defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">,
	VEX_W, PD, EVEX_CD8<64, CD8VF>;
	defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">,
	PS, EVEX_CD8<32, CD8VH>;

	def : Pat<(v8f64 (extloadv8f32 addr:$src)),
	(VCVTPS2PDZrm addr:$src)>;

	let Predicates = [HasVLX] in {
	let AddedComplexity = 15 in
	def : Pat<(X86vzmovl (v2f64 (bitconvert
	(v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
	(VCVTPD2PSZ128rr VR128X:$src)>;
	def : Pat<(v2f64 (extloadv2f32 addr:$src)),
	(VCVTPS2PDZ128rm addr:$src)>;
	def : Pat<(v4f64 (extloadv4f32 addr:$src)),
	(VCVTPS2PDZ256rm addr:$src)>;
	}

	// Convert Signed/Unsigned Doubleword to Double
	multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNode128> {
	// No rounding in this op
	let Predicates = [HasAVX512] in
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode>,
	EVEX_V512;

	let Predicates = [HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
	OpNode128, "{1to2}", "", i64mem>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode>,
	EVEX_V256;
	}
	}

	// Convert Signed/Unsigned Doubleword to Float
	multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNodeRnd> {
	let Predicates = [HasAVX512] in
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
	OpNodeRnd>, EVEX_V512;

	let Predicates = [HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode>,
	EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode>,
	EVEX_V256;
	}
	}

	// Convert Float to Signed/Unsigned Doubleword with truncation
	multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode OpNodeRnd> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
	OpNodeRnd>, EVEX_V512;
	}
	let Predicates = [HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
	EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
	EVEX_V256;
	}
	}

	// Convert Float to Signed/Unsigned Doubleword
	multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode OpNodeRnd> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
	OpNodeRnd>, EVEX_V512;
	}
	let Predicates = [HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
	EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
	EVEX_V256;
	}
	}

	// Convert Double to Signed/Unsigned Doubleword with truncation
	multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNode128, SDNode OpNodeRnd> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
	OpNodeRnd>, EVEX_V512;
	}
	let Predicates = [HasVLX] in {
	// we need "x"/"y" suffixes in order to distinguish between 128 and 256
	// memory forms of these instructions in Asm Parser. They have the same
	// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
	// due to the same reason.
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
	OpNode128, "{1to2}", "{x}">, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
	"{1to4}", "{y}">, EVEX_V256;

	def : InstAlias<OpcodeStr##"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
	def : InstAlias<OpcodeStr##"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
	def : InstAlias<OpcodeStr##"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
	def : InstAlias<OpcodeStr##"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
	}
	}

	// Convert Double to Signed/Unsigned Doubleword
	multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode OpNodeRnd> {
	let Predicates = [HasAVX512] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
	OpNodeRnd>, EVEX_V512;
	}
	let Predicates = [HasVLX] in {
	// we need "x"/"y" suffixes in order to distinguish between 128 and 256
	// memory forms of these instructions in Asm Parcer. They have the same
	// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
	// due to the same reason.
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
	"{1to2}", "{x}">, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
	"{1to4}", "{y}">, EVEX_V256;

	def : InstAlias<OpcodeStr##"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
	def : InstAlias<OpcodeStr##"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
	def : InstAlias<OpcodeStr##"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
	def : InstAlias<OpcodeStr##"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
	}
	}

	// Convert Double to Signed/Unsigned Quardword
	multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode OpNodeRnd> {
	let Predicates = [HasDQI] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
	OpNodeRnd>, EVEX_V512;
	}
	let Predicates = [HasDQI, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
	EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
	EVEX_V256;
	}
	}

	// Convert Double to Signed/Unsigned Quardword with truncation
	multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode OpNodeRnd> {
	let Predicates = [HasDQI] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
	OpNodeRnd>, EVEX_V512;
	}
	let Predicates = [HasDQI, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
	EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
	EVEX_V256;
	}
	}

	// Convert Signed/Unsigned Quardword to Double
	multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode OpNodeRnd> {
	let Predicates = [HasDQI] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
	OpNodeRnd>, EVEX_V512;
	}
	let Predicates = [HasDQI, HasVLX] in {
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode>,
	EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode>,
	EVEX_V256;
	}
	}

	// Convert Float to Signed/Unsigned Quardword
	multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr,
	SDNode OpNode, SDNode OpNodeRnd> {
	let Predicates = [HasDQI] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
	OpNodeRnd>, EVEX_V512;
	}
	let Predicates = [HasDQI, HasVLX] in {
	// Explicitly specified broadcast string, since we take only 2 elements
	// from v4f32x_info source
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
	"{1to2}", "", f64mem>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
	EVEX_V256;
	}
	}

	// Convert Float to Signed/Unsigned Quardword with truncation
	multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNode128, SDNode OpNodeRnd> {
	let Predicates = [HasDQI] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
	avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
	OpNodeRnd>, EVEX_V512;
	}
	let Predicates = [HasDQI, HasVLX] in {
	// Explicitly specified broadcast string, since we take only 2 elements
	// from v4f32x_info source
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
	"{1to2}", "", f64mem>, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
	EVEX_V256;
	}
	}

	// Convert Signed/Unsigned Quardword to Float
	multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
	SDNode OpNode128, SDNode OpNodeRnd> {
	let Predicates = [HasDQI] in {
	defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode>,
	avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
	OpNodeRnd>, EVEX_V512;
	}
	let Predicates = [HasDQI, HasVLX] in {
	// we need "x"/"y" suffixes in order to distinguish between 128 and 256
	// memory forms of these instructions in Asm Parcer. They have the same
	// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
	// due to the same reason.
	defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
	"{1to2}", "{x}">, EVEX_V128;
	defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
	"{1to4}", "{y}">, EVEX_V256;

	def : InstAlias<OpcodeStr##"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
	def : InstAlias<OpcodeStr##"x\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
	def : InstAlias<OpcodeStr##"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
	def : InstAlias<OpcodeStr##"y\t{$src, $dst\|$dst, $src}",
	(!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
	}
	}

	defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP>,
	XS, EVEX_CD8<32, CD8VH>;

	defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
	X86VSintToFpRnd>,
	PS, EVEX_CD8<32, CD8VF>;

	defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
	X86cvttp2siRnd>,
	XS, EVEX_CD8<32, CD8VF>;

	defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
	X86cvttp2siRnd>,
	PD, VEX_W, EVEX_CD8<64, CD8VF>;

	defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
	X86cvttp2uiRnd>, PS,
	EVEX_CD8<32, CD8VF>;

	defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
	X86cvttp2ui, X86cvttp2uiRnd>, PS, VEX_W,
	EVEX_CD8<64, CD8VF>;

	defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86VUintToFP>,
	XS, EVEX_CD8<32, CD8VH>;

	defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
	X86VUintToFpRnd>, XD,
	EVEX_CD8<32, CD8VF>;

	defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
	X86cvtp2IntRnd>, PD, EVEX_CD8<32, CD8VF>;

	defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
	X86cvtp2IntRnd>, XD, VEX_W,
	EVEX_CD8<64, CD8VF>;

	defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
	X86cvtp2UIntRnd>,
	PS, EVEX_CD8<32, CD8VF>;
	defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
	X86cvtp2UIntRnd>, VEX_W,
	PS, EVEX_CD8<64, CD8VF>;

	defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
	X86cvtp2IntRnd>, VEX_W,
	PD, EVEX_CD8<64, CD8VF>;

	defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
	X86cvtp2IntRnd>, PD, EVEX_CD8<32, CD8VH>;

	defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
	X86cvtp2UIntRnd>, VEX_W,
	PD, EVEX_CD8<64, CD8VF>;

	defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
	X86cvtp2UIntRnd>, PD, EVEX_CD8<32, CD8VH>;

	defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
	X86cvttp2siRnd>, VEX_W,
	PD, EVEX_CD8<64, CD8VF>;

	defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
	X86cvttp2siRnd>, PD, EVEX_CD8<32, CD8VH>;

	defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
	X86cvttp2uiRnd>, VEX_W,
	PD, EVEX_CD8<64, CD8VF>;

	defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
	X86cvttp2uiRnd>, PD, EVEX_CD8<32, CD8VH>;

	defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
	X86VSintToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;

	defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
	X86VUintToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;

	defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
	X86VSintToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>;

	defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
	X86VUintToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>;

	let Predicates = [HasAVX512, NoVLX] in {
	def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
	(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
	(v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src1, sub_ymm)))), sub_ymm)>;

	def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
	(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
	(v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
	VR128X:$src1, sub_xmm)))), sub_xmm)>;

	def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
	(EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
	(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src1, sub_ymm)))), sub_xmm)>;

	def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
	(EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
	(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR128X:$src, sub_xmm)))), sub_xmm)>;

	def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
	(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src1, sub_ymm)))), sub_ymm)>;

	def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
	(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
	(v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
	VR128X:$src1, sub_xmm)))), sub_xmm)>;

	def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
	(EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
	(v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
	VR128X:$src1, sub_xmm)))), sub_ymm)>;

	def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
	(EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
	(v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
	VR128X:$src1, sub_xmm)))), sub_xmm)>;
	}

	let Predicates = [HasAVX512, HasVLX] in {
	let AddedComplexity = 15 in {
	def : Pat<(X86vzmovl (v2i64 (bitconvert
	(v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
	(VCVTPD2DQZ128rr VR128X:$src)>;
	def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
	(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))))),
	(VCVTPD2UDQZ128rr VR128X:$src)>;
	def : Pat<(X86vzmovl (v2i64 (bitconvert
	(v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
	(VCVTTPD2DQZ128rr VR128X:$src)>;
	def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
	(v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))))),
	(VCVTTPD2UDQZ128rr VR128X:$src)>;
	}
	}

	let Predicates = [HasAVX512] in {
	def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
	(VCVTPD2PSZrm addr:$src)>;
	def : Pat<(v8f64 (extloadv8f32 addr:$src)),
	(VCVTPS2PDZrm addr:$src)>;
	}

	let Predicates = [HasDQI, HasVLX] in {
	let AddedComplexity = 15 in {
	def : Pat<(X86vzmovl (v2f64 (bitconvert
	(v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
	(VCVTQQ2PSZ128rr VR128X:$src)>;
	def : Pat<(X86vzmovl (v2f64 (bitconvert
	(v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
	(VCVTUQQ2PSZ128rr VR128X:$src)>;
	}
	}

	let Predicates = [HasDQI, NoVLX] in {
	def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
	(EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
	(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR128X:$src1, sub_xmm)))), sub_xmm)>;

	def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
	(EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
	(v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
	VR128X:$src1, sub_xmm)))), sub_ymm)>;

	def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
	(EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
	(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src1, sub_ymm)))), sub_ymm)>;

	def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
	(EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
	(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR128X:$src1, sub_xmm)))), sub_xmm)>;

	def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
	(EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
	(v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
	VR128X:$src1, sub_xmm)))), sub_ymm)>;

	def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
	(EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
	(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src1, sub_ymm)))), sub_ymm)>;

	def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
	(EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src1, sub_ymm)))), sub_xmm)>;

	def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
	(EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR128X:$src1, sub_xmm)))), sub_xmm)>;

	def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
	(EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src1, sub_ymm)))), sub_ymm)>;

	def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
	(EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src1, sub_ymm)))), sub_xmm)>;

	def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
	(EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR128X:$src1, sub_xmm)))), sub_xmm)>;

	def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
	(EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
	(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src1, sub_ymm)))), sub_ymm)>;
	}

	//===----------------------------------------------------------------------===//
	// Half precision conversion instructions
	//===----------------------------------------------------------------------===//
	multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
	X86MemOperand x86memop, PatFrag ld_frag> {
	defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
	"vcvtph2ps", "$src", "$src",
	(X86cvtph2ps (_src.VT _src.RC:$src),
	(i32 FROUND_CURRENT))>, T8PD;
	defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
	"vcvtph2ps", "$src", "$src",
	(X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))),
	(i32 FROUND_CURRENT))>, T8PD;
	}

	multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
	defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
	"vcvtph2ps", "{sae}, $src", "$src, {sae}",
	(X86cvtph2ps (_src.VT _src.RC:$src),
	(i32 FROUND_NO_EXC))>, T8PD, EVEX_B;

	}

	let Predicates = [HasAVX512] in {
	defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64>,
	avx512_cvtph2ps_sae<v16f32_info, v16i16x_info>,
	EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
	let Predicates = [HasVLX] in {
	defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
	loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
	defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
	loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
	}
	}

	multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
	X86MemOperand x86memop> {
	defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
	(ins _src.RC:$src1, i32u8imm:$src2),
	"vcvtps2ph", "$src2, $src1", "$src1, $src2",
	(X86cvtps2ph (_src.VT _src.RC:$src1),
	(i32 imm:$src2)),
	NoItinerary, 0, 0, X86select>, AVX512AIi8Base;
	def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
	(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
	"vcvtps2ph\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
	(i32 imm:$src2))),
	addr:$dst)]>;
	let hasSideEffects = 0, mayStore = 1 in
	def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
	(ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
	"vcvtps2ph\t{$src2, $src1, $dst {${mask}}\|$dst {${mask}}, $src1, $src2}",
	[]>, EVEX_K;
	}
	multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
	let hasSideEffects = 0 in
	defm rb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
	(outs _dest.RC:$dst),
	(ins _src.RC:$src1, i32u8imm:$src2),
	"vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
	[]>, EVEX_B, AVX512AIi8Base;
	}
	let Predicates = [HasAVX512] in {
	defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
	avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
	EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
	let Predicates = [HasVLX] in {
	defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
	EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
	defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
	EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
	}
	}

	// Patterns for matching conversions from float to half-float and vice versa.
	let Predicates = [HasVLX] in {
	// Use MXCSR.RC for rounding instead of explicitly specifying the default
	// rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
	// configurations we support (the default). However, falling back to MXCSR is
	// more consistent with other instructions, which are always controlled by it.
	// It's encoded as 0b100.
	def : Pat<(fp_to_f16 FR32X:$src),
	(i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
	(COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;

	def : Pat<(f16_to_fp GR16:$src),
	(f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
	(COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;

	def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
	(f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
	(VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
	}

	// Patterns for matching float to half-float conversion when AVX512 is supported
	// but F16C isn't. In that case we have to use 512-bit vectors.
	let Predicates = [HasAVX512, NoVLX, NoF16C] in {
	def : Pat<(fp_to_f16 FR32X:$src),
	(i16 (EXTRACT_SUBREG
	(VMOVPDI2DIZrr
	(v8i16 (EXTRACT_SUBREG
	(VCVTPS2PHZrr
	(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
	(v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)),
	sub_xmm), 4), sub_xmm))), sub_16bit))>;

	def : Pat<(f16_to_fp GR16:$src),
	(f32 (COPY_TO_REGCLASS
	(v4f32 (EXTRACT_SUBREG
	(VCVTPH2PSZrr
	(INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
	(v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)),
	sub_xmm)), sub_xmm)), FR32X))>;

	def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
	(f32 (COPY_TO_REGCLASS
	(v4f32 (EXTRACT_SUBREG
	(VCVTPH2PSZrr
	(VCVTPS2PHZrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
	(v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)),
	sub_xmm), 4)), sub_xmm)), FR32X))>;
	}

	// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
	multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
	string OpcodeStr> {
	def rb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
	!strconcat(OpcodeStr, "\t{{sae}, $src2, $src1\|$src1, $src2, {sae}}"),
	[], IIC_SSE_COMIS_RR>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
	Sched<[WriteFAdd]>;
	}

	let Defs = [EFLAGS], Predicates = [HasAVX512] in {
	defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss">,
	AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
	defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd">,
	AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
	defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss">,
	AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
	defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd">,
	AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
	}

	let Defs = [EFLAGS], Predicates = [HasAVX512] in {
	defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
	"ucomiss">, PS, EVEX, VEX_LIG,
	EVEX_CD8<32, CD8VT1>;
	defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
	"ucomisd">, PD, EVEX,
	VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
	let Pattern = []<dag> in {
	defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
	"comiss">, PS, EVEX, VEX_LIG,
	EVEX_CD8<32, CD8VT1>;
	defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
	"comisd">, PD, EVEX,
	VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
	}
	let isCodeGenOnly = 1 in {
	defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
	sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG,
	EVEX_CD8<32, CD8VT1>;
	defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
	sse_load_f64, "ucomisd">, PD, EVEX,
	VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;

	defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
	sse_load_f32, "comiss">, PS, EVEX, VEX_LIG,
	EVEX_CD8<32, CD8VT1>;
	defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
	sse_load_f64, "comisd">, PD, EVEX,
	VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
	}
	}

	/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
	multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
	defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
	defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1),
	(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))))>, EVEX_4V;
	}
	}

	defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>,
	EVEX_CD8<32, CD8VT1>, T8PD;
	defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>,
	VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
	defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>,
	EVEX_CD8<32, CD8VT1>, T8PD;
	defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>,
	VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;

	/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
	multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src), OpcodeStr, "$src", "$src",
	(_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD;
	defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
	(OpNode (_.FloatVT
	(bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD;
	defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src), OpcodeStr,
	"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
	(OpNode (_.FloatVT
	(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
	EVEX, T8PD, EVEX_B;
	}
	}

	multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
	defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, v16f32_info>,
	EVEX_V512, EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, v8f64_info>,
	EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;

	// Define only if AVX512VL feature is present.
	let Predicates = [HasVLX] in {
	defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
	OpNode, v4f32x_info>,
	EVEX_V128, EVEX_CD8<32, CD8VF>;
	defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
	OpNode, v8f32x_info>,
	EVEX_V256, EVEX_CD8<32, CD8VF>;
	defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
	OpNode, v2f64x_info>,
	EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
	defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
	OpNode, v4f64x_info>,
	EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
	}
	}

	defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
	defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;

	/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
	multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
	SDNode OpNode> {
	let ExeDomain = _.ExeDomain in {
	defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	(i32 FROUND_CURRENT))>;

	defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"{sae}, $src2, $src1", "$src1, $src2, {sae}",
	(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	(i32 FROUND_NO_EXC))>, EVEX_B;

	defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1),
	(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
	(i32 FROUND_CURRENT))>;
	}
	}

	multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode> {
	defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode>,
	EVEX_CD8<32, CD8VT1>;
	defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode>,
	EVEX_CD8<64, CD8VT1>, VEX_W;
	}

	let Predicates = [HasERI] in {
	defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V;
	defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
	}

	defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
	/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd

	multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	SDNode OpNode> {
	let ExeDomain = _.ExeDomain in {
	defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src), OpcodeStr, "$src", "$src",
	(OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>;

	defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
	(OpNode (_.FloatVT
	(bitconvert (_.LdFrag addr:$src))),
	(i32 FROUND_CURRENT))>;

	defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src), OpcodeStr,
	"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
	(OpNode (_.FloatVT
	(X86VBroadcast (_.ScalarLdFrag addr:$src))),
	(i32 FROUND_CURRENT))>, EVEX_B;
	}
	}
	multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	SDNode OpNode> {
	let ExeDomain = _.ExeDomain in
	defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src), OpcodeStr,
	"{sae}, $src", "$src, {sae}",
	(OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
	}

	multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
	defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
	avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
	T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
	defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
	avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
	T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
	}

	multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
	SDNode OpNode> {
	// Define only if AVX512VL feature is present.
	let Predicates = [HasVLX] in {
	defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>,
	EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
	defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>,
	EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
	defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>,
	EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
	defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>,
	EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
	}
	}
	let Predicates = [HasERI] in {

	defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX;
	defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX;
	defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX;
	}
	defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
	avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX;

	multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
	SDNode OpNodeRnd, X86VectorVTInfo _>{
	let ExeDomain = _.ExeDomain in
	defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
	(_.VT (OpNodeRnd _.RC:$src, (i32 imm:$rc)))>,
	EVEX, EVEX_B, EVEX_RC;
	}

	multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
	SDNode OpNode, X86VectorVTInfo _>{
	let ExeDomain = _.ExeDomain in {
	defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src), OpcodeStr, "$src", "$src",
	(_.FloatVT (OpNode _.RC:$src))>, EVEX;
	defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
	(OpNode (_.FloatVT
	(bitconvert (_.LdFrag addr:$src))))>, EVEX;

	defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src), OpcodeStr,
	"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
	(OpNode (_.FloatVT
	(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
	EVEX, EVEX_B;
	}
	}

	multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
	SDNode OpNode> {
	defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
	v16f32_info>,
	EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
	v8f64_info>,
	EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
	// Define only if AVX512VL feature is present.
	let Predicates = [HasVLX] in {
	defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
	OpNode, v4f32x_info>,
	EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
	defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
	OpNode, v8f32x_info>,
	EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
	defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
	OpNode, v2f64x_info>,
	EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
	defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
	OpNode, v4f64x_info>,
	EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
	}
	}

	multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
	SDNode OpNodeRnd> {
	defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
	v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
	defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
	v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
	}

	multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
	string SUFF, SDNode OpNode, SDNode OpNodeRnd> {
	let ExeDomain = _.ExeDomain in {
	defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(OpNodeRnd (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(i32 FROUND_CURRENT))>;
	defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
	"$src2, $src1", "$src1, $src2",
	(OpNodeRnd (_.VT _.RC:$src1),
	(_.VT (scalar_to_vector
	(_.ScalarLdFrag addr:$src2))),
	(i32 FROUND_CURRENT))>;

	defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
	"$rc, $src2, $src1", "$src1, $src2, $rc",
	(OpNodeRnd (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(i32 imm:$rc))>,
	EVEX_B, EVEX_RC;

	let isCodeGenOnly = 1, hasSideEffects = 0 in {
	def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.FRC:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>;

	let mayLoad = 1 in
	def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
	(ins _.FRC:$src1, _.ScalarMemOp:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>;
	}
	}

	def : Pat<(_.EltVT (OpNode _.FRC:$src)),
	(!cast<Instruction>(NAME#SUFF#Zr)
	(_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;

	def : Pat<(_.EltVT (OpNode (load addr:$src))),
	(!cast<Instruction>(NAME#SUFF#Zm)
	(_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>;
	}

	multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
	defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", f32x_info, "SS", fsqrt,
	X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
	defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", f64x_info, "SD", fsqrt,
	X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
	}

	defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,
	avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>;

	defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;

	let Predicates = [HasAVX512] in {
	def : Pat<(f32 (X86frsqrt FR32X:$src)),
	(COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>;
	def : Pat<(f32 (X86frsqrt (load addr:$src))),
	(COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
	Requires<[OptForSize]>;
	def : Pat<(f32 (X86frcp FR32X:$src)),
	(COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>;
	def : Pat<(f32 (X86frcp (load addr:$src))),
	(COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
	Requires<[OptForSize]>;
	}

	multiclass
	avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {

	let ExeDomain = _.ExeDomain in {
	defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
	"$src3, $src2, $src1", "$src1, $src2, $src3",
	(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	(i32 imm:$src3), (i32 FROUND_CURRENT)))>;

	defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
	"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
	(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
	(i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;

	defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
	OpcodeStr,
	"$src3, $src2, $src1", "$src1, $src2, $src3",
	(_.VT (X86RndScales (_.VT _.RC:$src1),
	(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
	(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
	}
	let Predicates = [HasAVX512] in {
	def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
	(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x9))), _.FRC)>;
	def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS
	(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xa))), _.FRC)>;
	def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS
	(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xb))), _.FRC)>;
	def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS
	(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>;
	def : Pat<(fnearbyint _.FRC:$src), (COPY_TO_REGCLASS
	(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
	(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xc))), _.FRC)>;

	def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
	(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
	addr:$src, (i32 0x9))), _.FRC)>;
	def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
	(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
	addr:$src, (i32 0xa))), _.FRC)>;
	def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
	(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
	addr:$src, (i32 0xb))), _.FRC)>;
	def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
	(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
	addr:$src, (i32 0x4))), _.FRC)>;
	def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
	(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
	addr:$src, (i32 0xc))), _.FRC)>;
	}
	}

	defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;

	defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;

	//-------------------------------------------------
	// Integer truncate and extend operations
	//-------------------------------------------------

	multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo,
	X86MemOperand x86memop> {
	let ExeDomain = DestInfo.ExeDomain in
	defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
	(ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
	(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
	EVEX, T8XS;

	// for intrinsic patter match
	def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
	(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
	undef)),
	(!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
	SrcInfo.RC:$src1)>;

	def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
	(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
	DestInfo.ImmAllZerosV)),
	(!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
	SrcInfo.RC:$src1)>;

	def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
	(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
	DestInfo.RC:$src0)),
	(!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0,
	DestInfo.KRCWM:$mask ,
	SrcInfo.RC:$src1)>;

	let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
	ExeDomain = DestInfo.ExeDomain in {
	def mr : AVX512XS8I<opc, MRMDestMem, (outs),
	(ins x86memop:$dst, SrcInfo.RC:$src),
	OpcodeStr # "\t{$src, $dst\|$dst, $src}",
	[]>, EVEX;

	def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
	(ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
	OpcodeStr # "\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	[]>, EVEX, EVEX_K;
	}//mayStore = 1, mayLoad = 1, hasSideEffects = 0
	}

	multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
	X86VectorVTInfo DestInfo,
	PatFrag truncFrag, PatFrag mtruncFrag > {

	def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
	(!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
	addr:$dst, SrcInfo.RC:$src)>;

	def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
	(SrcInfo.VT SrcInfo.RC:$src)),
	(!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
	addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
	}

	multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
	X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
	X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
	X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
	Predicate prd = HasAVX512>{

	let Predicates = [HasVLX, prd] in {
	defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
	DestInfoZ128, x86memopZ128>,
	avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
	truncFrag, mtruncFrag>, EVEX_V128;

	defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
	DestInfoZ256, x86memopZ256>,
	avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
	truncFrag, mtruncFrag>, EVEX_V256;
	}
	let Predicates = [prd] in
	defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
	DestInfoZ, x86memopZ>,
	avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
	truncFrag, mtruncFrag>, EVEX_V512;
	}

	multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	PatFrag StoreNode, PatFrag MaskedStoreNode> {
	defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
	v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
	StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
	}

	multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
	PatFrag StoreNode, PatFrag MaskedStoreNode> {
	defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
	v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
	StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
	}

	multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
	PatFrag StoreNode, PatFrag MaskedStoreNode> {
	defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
	v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
	StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
	}

	multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
	PatFrag StoreNode, PatFrag MaskedStoreNode> {
	defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
	v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
	StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
	}

	multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
	PatFrag StoreNode, PatFrag MaskedStoreNode> {
	defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
	v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
	StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
	}

	multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	PatFrag StoreNode, PatFrag MaskedStoreNode> {
	defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
	v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
	StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
	}

	defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc,
	truncstorevi8, masked_truncstorevi8>;
	defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs,
	truncstore_s_vi8, masked_truncstore_s_vi8>;
	defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
	truncstore_us_vi8, masked_truncstore_us_vi8>;

	defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc,
	truncstorevi16, masked_truncstorevi16>;
	defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs,
	truncstore_s_vi16, masked_truncstore_s_vi16>;
	defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
	truncstore_us_vi16, masked_truncstore_us_vi16>;

	defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc,
	truncstorevi32, masked_truncstorevi32>;
	defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs,
	truncstore_s_vi32, masked_truncstore_s_vi32>;
	defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
	truncstore_us_vi32, masked_truncstore_us_vi32>;

	defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc,
	truncstorevi8, masked_truncstorevi8>;
	defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs,
	truncstore_s_vi8, masked_truncstore_s_vi8>;
	defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
	truncstore_us_vi8, masked_truncstore_us_vi8>;

	defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc,
	truncstorevi16, masked_truncstorevi16>;
	defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs,
	truncstore_s_vi16, masked_truncstore_s_vi16>;
	defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
	truncstore_us_vi16, masked_truncstore_us_vi16>;

	defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc,
	truncstorevi8, masked_truncstorevi8>;
	defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs,
	truncstore_s_vi8, masked_truncstore_s_vi8>;
	defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
	truncstore_us_vi8, masked_truncstore_us_vi8>;

	let Predicates = [HasAVX512, NoVLX] in {
	def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
	(v8i16 (EXTRACT_SUBREG
	(v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src, sub_ymm)))), sub_xmm))>;
	def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
	(v4i32 (EXTRACT_SUBREG
	(v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src, sub_ymm)))), sub_xmm))>;
	}

	let Predicates = [HasBWI, NoVLX] in {
	def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
	(v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
	VR256X:$src, sub_ymm))), sub_xmm))>;
	}

	multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
	X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{
	let ExeDomain = DestInfo.ExeDomain in {
	defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
	(ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
	(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
	EVEX;

	defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
	(ins x86memop:$src), OpcodeStr ,"$src", "$src",
	(DestInfo.VT (LdFrag addr:$src))>,
	EVEX;
	}
	}

	multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode, SDPatternOperator InVecNode,
	string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
	let Predicates = [HasVLX, HasBWI] in {
	defm Z128: avx512_extend_common<opc, OpcodeStr, v8i16x_info,
	v16i8x_info, i64mem, LdFrag, InVecNode>,
	EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;

	defm Z256: avx512_extend_common<opc, OpcodeStr, v16i16x_info,
	v16i8x_info, i128mem, LdFrag, OpNode>,
	EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256;
	}
	let Predicates = [HasBWI] in {
	defm Z : avx512_extend_common<opc, OpcodeStr, v32i16_info,
	v32i8x_info, i256mem, LdFrag, OpNode>,
	EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512;
	}
	}

	multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode, SDPatternOperator InVecNode,
	string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
	let Predicates = [HasVLX, HasAVX512] in {
	defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
	v16i8x_info, i32mem, LdFrag, InVecNode>,
	EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;

	defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
	v16i8x_info, i64mem, LdFrag, OpNode>,
	EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256;
	}
	let Predicates = [HasAVX512] in {
	defm Z : avx512_extend_common<opc, OpcodeStr, v16i32_info,
	v16i8x_info, i128mem, LdFrag, OpNode>,
	EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512;
	}
	}

	multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode, SDPatternOperator InVecNode,
	string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
	let Predicates = [HasVLX, HasAVX512] in {
	defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
	v16i8x_info, i16mem, LdFrag, InVecNode>,
	EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;

	defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
	v16i8x_info, i32mem, LdFrag, OpNode>,
	EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256;
	}
	let Predicates = [HasAVX512] in {
	defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
	v16i8x_info, i64mem, LdFrag, OpNode>,
	EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512;
	}
	}

	multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode, SDPatternOperator InVecNode,
	string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
	let Predicates = [HasVLX, HasAVX512] in {
	defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
	v8i16x_info, i64mem, LdFrag, InVecNode>,
	EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;

	defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
	v8i16x_info, i128mem, LdFrag, OpNode>,
	EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256;
	}
	let Predicates = [HasAVX512] in {
	defm Z : avx512_extend_common<opc, OpcodeStr, v16i32_info,
	v16i16x_info, i256mem, LdFrag, OpNode>,
	EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512;
	}
	}

	multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode, SDPatternOperator InVecNode,
	string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
	let Predicates = [HasVLX, HasAVX512] in {
	defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
	v8i16x_info, i32mem, LdFrag, InVecNode>,
	EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;

	defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
	v8i16x_info, i64mem, LdFrag, OpNode>,
	EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256;
	}
	let Predicates = [HasAVX512] in {
	defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
	v8i16x_info, i128mem, LdFrag, OpNode>,
	EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512;
	}
	}

	multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
	SDPatternOperator OpNode, SDPatternOperator InVecNode,
	string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {

	let Predicates = [HasVLX, HasAVX512] in {
	defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
	v4i32x_info, i64mem, LdFrag, InVecNode>,
	EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;

	defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
	v4i32x_info, i128mem, LdFrag, OpNode>,
	EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
	}
	let Predicates = [HasAVX512] in {
	defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
	v8i32x_info, i256mem, LdFrag, OpNode>,
	EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
	}
	}

	defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z">;
	defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z">;
	defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z">;
	defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z">;
	defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z">;
	defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z">;

	defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s">;
	defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s">;
	defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s">;
	defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s">;
	defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s">;
	defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s">;

	// EXTLOAD patterns, implemented using vpmovz
	multiclass avx512_ext_lowering<string InstrStr, X86VectorVTInfo To,
	X86VectorVTInfo From, PatFrag LdFrag> {
	def : Pat<(To.VT (LdFrag addr:$src)),
	(!cast<Instruction>("VPMOVZX"#InstrStr#"rm") addr:$src)>;
	def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src), To.RC:$src0)),
	(!cast<Instruction>("VPMOVZX"#InstrStr#"rmk") To.RC:$src0,
	To.KRC:$mask, addr:$src)>;
	def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src),
	To.ImmAllZerosV)),
	(!cast<Instruction>("VPMOVZX"#InstrStr#"rmkz") To.KRC:$mask,
	addr:$src)>;
	}

	let Predicates = [HasVLX, HasBWI] in {
	defm : avx512_ext_lowering<"BWZ128", v8i16x_info, v16i8x_info, extloadvi8>;
	defm : avx512_ext_lowering<"BWZ256", v16i16x_info, v16i8x_info, extloadvi8>;
	}
	let Predicates = [HasBWI] in {
	defm : avx512_ext_lowering<"BWZ", v32i16_info, v32i8x_info, extloadvi8>;
	}
	let Predicates = [HasVLX, HasAVX512] in {
	defm : avx512_ext_lowering<"BDZ128", v4i32x_info, v16i8x_info, extloadvi8>;
	defm : avx512_ext_lowering<"BDZ256", v8i32x_info, v16i8x_info, extloadvi8>;
	defm : avx512_ext_lowering<"BQZ128", v2i64x_info, v16i8x_info, extloadvi8>;
	defm : avx512_ext_lowering<"BQZ256", v4i64x_info, v16i8x_info, extloadvi8>;
	defm : avx512_ext_lowering<"WDZ128", v4i32x_info, v8i16x_info, extloadvi16>;
	defm : avx512_ext_lowering<"WDZ256", v8i32x_info, v8i16x_info, extloadvi16>;
	defm : avx512_ext_lowering<"WQZ128", v2i64x_info, v8i16x_info, extloadvi16>;
	defm : avx512_ext_lowering<"WQZ256", v4i64x_info, v8i16x_info, extloadvi16>;
	defm : avx512_ext_lowering<"DQZ128", v2i64x_info, v4i32x_info, extloadvi32>;
	defm : avx512_ext_lowering<"DQZ256", v4i64x_info, v4i32x_info, extloadvi32>;
	}
	let Predicates = [HasAVX512] in {
	defm : avx512_ext_lowering<"BDZ", v16i32_info, v16i8x_info, extloadvi8>;
	defm : avx512_ext_lowering<"BQZ", v8i64_info, v16i8x_info, extloadvi8>;
	defm : avx512_ext_lowering<"WDZ", v16i32_info, v16i16x_info, extloadvi16>;
	defm : avx512_ext_lowering<"WQZ", v8i64_info, v8i16x_info, extloadvi16>;
	defm : avx512_ext_lowering<"DQZ", v8i64_info, v8i32x_info, extloadvi32>;
	}

	multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
	SDNode InVecOp, PatFrag ExtLoad16> {
	// 128-bit patterns
	let Predicates = [HasVLX, HasBWI] in {
	def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
	(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
	def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
	(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
	def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
	def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
	def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
	}
	let Predicates = [HasVLX] in {
	def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
	(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
	def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
	(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
	def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
	def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;

	def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
	(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
	(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;

	def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
	(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
	def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
	(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
	def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
	def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
	def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;

	def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
	(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
	(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;

	def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
	(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
	(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
	def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
	}
	// 256-bit patterns
	let Predicates = [HasVLX, HasBWI] in {
	def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
	def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
	def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
	}
	let Predicates = [HasVLX] in {
	def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
	(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
	def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
	def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
	def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;

	def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
	(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
	def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
	(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
	def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
	def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;

	def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
	def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
	def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;

	def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
	(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
	def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
	def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
	def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;

	def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
	def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
	def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
	}
	// 512-bit patterns
	let Predicates = [HasBWI] in {
	def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
	}
	let Predicates = [HasAVX512] in {
	def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BDZrm) addr:$src)>;

	def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
	(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
	def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;

	def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WDZrm) addr:$src)>;

	def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
	(!cast<I>(OpcPrefix#WQZrm) addr:$src)>;

	def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
	(!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
	}
	}

	defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
	defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;

	//===----------------------------------------------------------------------===//
	// GATHER - SCATTER Operations

	multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86MemOperand memop, PatFrag GatherNode> {
	let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
	ExeDomain = _.ExeDomain in
	def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
	(ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
	!strconcat(OpcodeStr#_.Suffix,
	"\t{$src2, ${dst} {${mask}}\|${dst} {${mask}}, $src2}"),
	[(set _.RC:$dst, _.KRCWM:$mask_wb,
	(GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask,
	vectoraddr:$src2))]>, EVEX, EVEX_K,
	EVEX_CD8<_.EltSize, CD8VT1>;
	}

	multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
	AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
	defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
	vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
	defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
	vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
	let Predicates = [HasVLX] in {
	defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
	vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
	defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
	vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
	defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
	vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
	defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
	vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
	}
	}

	multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
	AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
	defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
	mgatherv16i32>, EVEX_V512;
	defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
	mgatherv8i64>, EVEX_V512;
	let Predicates = [HasVLX] in {
	defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
	vy256xmem, mgatherv8i32>, EVEX_V256;
	defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
	vy128xmem, mgatherv4i64>, EVEX_V256;
	defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
	vx128xmem, mgatherv4i32>, EVEX_V128;
	defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
	vx64xmem, X86mgatherv2i64>, EVEX_V128;
	}
	}


	defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
	avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;

	defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
	avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;

	multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
	X86MemOperand memop, PatFrag ScatterNode> {

	let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in

	def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
	(ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
	!strconcat(OpcodeStr#_.Suffix,
	"\t{$src, ${dst} {${mask}}\|${dst} {${mask}}, $src}"),
	[(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
	_.KRCWM:$mask, vectoraddr:$dst))]>,
	EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
	}

	multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
	AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
	defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
	vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
	defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
	vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
	let Predicates = [HasVLX] in {
	defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
	vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
	defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
	vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
	defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
	vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
	defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
	vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
	}
	}

	multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
	AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
	defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
	mscatterv16i32>, EVEX_V512;
	defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
	mscatterv8i64>, EVEX_V512;
	let Predicates = [HasVLX] in {
	defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
	vy256xmem, mscatterv8i32>, EVEX_V256;
	defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
	vy128xmem, mscatterv4i64>, EVEX_V256;
	defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
	vx128xmem, mscatterv4i32>, EVEX_V128;
	defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
	vx64xmem, mscatterv2i64>, EVEX_V128;
	}
	}

	defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
	avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;

	defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
	avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;

	// prefetch
	multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
	RegisterClass KRC, X86MemOperand memop> {
	let Predicates = [HasPFI], hasSideEffects = 1 in
	def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
	!strconcat(OpcodeStr, "\t{$src {${mask}}\|{${mask}}, $src}"),
	[]>, EVEX, EVEX_K;
	}

	defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
	VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

	defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
	VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;

	defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
	VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;

	defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
	VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;

	defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
	VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

	defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
	VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;

	defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
	VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;

	defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
	VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;

	defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
	VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

	defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
	VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;

	defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
	VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;

	defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
	VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;

	defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
	VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;

	defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
	VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;

	defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
	VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;

	defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
	VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;

	// Helper fragments to match sext vXi1 to vXiY.
	def v64i1sextv64i8 : PatLeaf<(v64i8
	(X86vsext
	(v64i1 (X86pcmpgtm
	(bc_v64i8 (v16i32 immAllZerosV)),
	VR512:$src))))>;
	def v32i1sextv32i16 : PatLeaf<(v32i16 (X86vsrai VR512:$src, (i8 15)))>;
	def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
	def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;

	multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
	def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
	!strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst\|$dst, $src}"),
	[(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
	}

	// Use 512bit version to implement 128/256 bit in case NoVLX.
	multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info,
	X86VectorVTInfo _> {

	def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))),
	(X86Info.VT (EXTRACT_SUBREG
	(_.VT (!cast<Instruction>(NAME#"Zrr")
	(_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))),
	X86Info.SubRegIdx))>;
	}

	multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
	string OpcodeStr, Predicate prd> {
	let Predicates = [prd] in
	defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
	defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
	}
	let Predicates = [prd, NoVLX] in {
	defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>;
	defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>;
	}

	}

	defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
	defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
	defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
	defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;

	multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
	def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
	!strconcat(OpcodeStr, "\t{$src, $dst\|$dst, $src}"),
	[(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))]>, EVEX;
	}

	// Use 512bit version to implement 128/256 bit in case NoVLX.
	multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
	X86VectorVTInfo _> {

	def : Pat<(_.KVT (X86cvt2mask (_.VT _.RC:$src))),
	(_.KVT (COPY_TO_REGCLASS
	(!cast<Instruction>(NAME#"Zrr")
	(INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
	_.RC:$src, _.SubRegIdx)),
	_.KRC))>;
	}

	multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo, Predicate prd> {
	let Predicates = [prd] in
	defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
	EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
	EVEX_V256;
	defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
	EVEX_V128;
	}
	let Predicates = [prd, NoVLX] in {
	defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
	defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
	}
	}

	defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
	avx512vl_i8_info, HasBWI>;
	defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
	avx512vl_i16_info, HasBWI>, VEX_W;
	defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
	avx512vl_i32_info, HasDQI>;
	defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
	avx512vl_i64_info, HasDQI>, VEX_W;

	//===----------------------------------------------------------------------===//
	// AVX-512 - COMPRESS and EXPAND
	//

	multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
	string OpcodeStr> {
	defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
	(_.VT (X86compress _.RC:$src1))>, AVX5128IBase;

	let mayStore = 1, hasSideEffects = 0 in
	def mr : AVX5128I<opc, MRMDestMem, (outs),
	(ins _.MemOp:$dst, _.RC:$src),
	OpcodeStr # "\t{$src, $dst\|$dst, $src}",
	[]>, EVEX_CD8<_.EltSize, CD8VT1>;

	def mrk : AVX5128I<opc, MRMDestMem, (outs),
	(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
	OpcodeStr # "\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
	[]>,
	EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
	}

	multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {

	def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
	(_.VT _.RC:$src)),
	(!cast<Instruction>(NAME#_.ZSuffix##mrk)
	addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
	}

	multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo> {
	defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr>,
	compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;

	let Predicates = [HasVLX] in {
	defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr>,
	compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
	defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr>,
	compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
	}
	}

	defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", avx512vl_i32_info>,
	EVEX;
	defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", avx512vl_i64_info>,
	EVEX, VEX_W;
	defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info>,
	EVEX;
	defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>,
	EVEX, VEX_W;

	// expand
	multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
	string OpcodeStr> {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
	(_.VT (X86expand _.RC:$src1))>, AVX5128IBase;

	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
	(_.VT (X86expand (_.VT (bitconvert
	(_.LdFrag addr:$src1)))))>,
	AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>;
	}

	multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {

	def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
	(!cast<Instruction>(NAME#_.ZSuffix##rmkz)
	_.KRCWM:$mask, addr:$src)>;

	def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
	(_.VT _.RC:$src0))),
	(!cast<Instruction>(NAME#_.ZSuffix##rmk)
	_.RC:$src0, _.KRCWM:$mask, addr:$src)>;
	}

	multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
	AVX512VLVectorVTInfo VTInfo> {
	defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr>,
	expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;

	let Predicates = [HasVLX] in {
	defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr>,
	expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
	defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr>,
	expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
	}
	}

	defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", avx512vl_i32_info>,
	EVEX;
	defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", avx512vl_i64_info>,
	EVEX, VEX_W;
	defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
	EVEX;
	defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
	EVEX, VEX_W;

	//handle instruction reg_vec1 = op(reg_vec,imm)
	// op(mem_vec,imm)
	// op(broadcast(eltVt),imm)
	//all instruction created with FROUND_CURRENT
	multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _>{
	let ExeDomain = _.ExeDomain in {
	defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
	(OpNode (_.VT _.RC:$src1),
	(i32 imm:$src2),
	(i32 FROUND_CURRENT))>;
	defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.MemOp:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
	(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
	(i32 imm:$src2),
	(i32 FROUND_CURRENT))>;
	defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
	"${src1}"##_.BroadcastStr##", $src2",
	(OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
	(i32 imm:$src2),
	(i32 FROUND_CURRENT))>, EVEX_B;
	}
	}

	//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
	multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
	SDNode OpNode, X86VectorVTInfo _>{
	let ExeDomain = _.ExeDomain in
	defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, i32u8imm:$src2),
	OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
	"$src1, {sae}, $src2",
	(OpNode (_.VT _.RC:$src1),
	(i32 imm:$src2),
	(i32 FROUND_NO_EXC))>, EVEX_B;
	}

	multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
	AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
	let Predicates = [prd] in {
	defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
	avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
	EVEX_V512;
	}
	let Predicates = [prd, HasVLX] in {
	defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
	EVEX_V128;
	defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
	EVEX_V256;
	}
	}

	//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
	// op(reg_vec2,mem_vec,imm)
	// op(reg_vec2,broadcast(eltVt),imm)
	//all instruction created with FROUND_CURRENT
	multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _>{
	let ExeDomain = _.ExeDomain in {
	defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(i32 imm:$src3),
	(i32 FROUND_CURRENT))>;
	defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT (bitconvert (_.LdFrag addr:$src2))),
	(i32 imm:$src3),
	(i32 FROUND_CURRENT))>;
	defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
	"$src1, ${src2}"##_.BroadcastStr##", $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
	(i32 imm:$src3),
	(i32 FROUND_CURRENT))>, EVEX_B;
	}
	}

	//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
	// op(reg_vec2,mem_vec,imm)
	multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
	let ExeDomain = DestInfo.ExeDomain in {
	defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
	(ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
	(SrcInfo.VT SrcInfo.RC:$src2),
	(i8 imm:$src3)))>;
	defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
	(ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
	(SrcInfo.VT (bitconvert
	(SrcInfo.LdFrag addr:$src2))),
	(i8 imm:$src3)))>;
	}
	}

	//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
	// op(reg_vec2,mem_vec,imm)
	// op(reg_vec2,broadcast(eltVt),imm)
	multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _>:
	avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{

	let ExeDomain = _.ExeDomain in
	defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
	OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
	"$src1, ${src2}"##_.BroadcastStr##", $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
	(i8 imm:$src3))>, EVEX_B;
	}

	//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
	// op(reg_vec2,mem_scalar,imm)
	//all instruction created with FROUND_CURRENT
	multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(i32 imm:$src3),
	(i32 FROUND_CURRENT))>;
	defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT (scalar_to_vector
	(_.ScalarLdFrag addr:$src2))),
	(i32 imm:$src3),
	(i32 FROUND_CURRENT))>;
	}
	}

	//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
	multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
	SDNode OpNode, X86VectorVTInfo _>{
	let ExeDomain = _.ExeDomain in
	defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, {sae}, $src2, $src1",
	"$src1, $src2, {sae}, $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(i32 imm:$src3),
	(i32 FROUND_NO_EXC))>, EVEX_B;
	}
	//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
	multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
	SDNode OpNode, X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in
	defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
	OpcodeStr, "$src3, {sae}, $src2, $src1",
	"$src1, $src2, {sae}, $src3",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(i32 imm:$src3),
	(i32 FROUND_NO_EXC))>, EVEX_B;
	}

	multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
	AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
	let Predicates = [prd] in {
	defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
	avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
	EVEX_V512;

	}
	let Predicates = [prd, HasVLX] in {
	defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
	EVEX_V128;
	defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
	EVEX_V256;
	}
	}

	multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
	AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
	let Predicates = [HasBWI] in {
	defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
	SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
	}
	let Predicates = [HasBWI, HasVLX] in {
	defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
	SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
	defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info256,
	SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
	}
	}

	multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
	bits<8> opc, SDNode OpNode>{
	let Predicates = [HasAVX512] in {
	defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
	}
	let Predicates = [HasAVX512, HasVLX] in {
	defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
	defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
	}
	}

	multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
	X86VectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
	let Predicates = [prd] in {
	defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, _>,
	avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNode, _>;
	}
	}

	multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
	bits<8> opcPs, bits<8> opcPd, SDNode OpNode, Predicate prd>{
	defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
	opcPs, OpNode, prd>, EVEX_CD8<32, CD8VF>;
	defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
	opcPd, OpNode, prd>, EVEX_CD8<64, CD8VF>, VEX_W;
	}


	defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
	X86VReduce, HasDQI>, AVX512AIi8Base, EVEX;
	defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
	X86VRndScale, HasAVX512>, AVX512AIi8Base, EVEX;
	defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
	X86VGetMant, HasAVX512>, AVX512AIi8Base, EVEX;


	defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
	0x50, X86VRange, HasDQI>,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
	defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
	0x50, X86VRange, HasDQI>,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;

	defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info,
	0x51, X86VRange, HasDQI>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
	defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
	0x51, X86VRange, HasDQI>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;

	defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
	0x57, X86Reduces, HasDQI>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
	defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
	0x57, X86Reduces, HasDQI>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;

	defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
	0x27, X86GetMants, HasAVX512>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
	defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
	0x27, X86GetMants, HasAVX512>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;

	multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
	bits<8> opc, SDNode OpNode = X86Shuf128>{
	let Predicates = [HasAVX512] in {
	defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;

	}
	let Predicates = [HasAVX512, HasVLX] in {
	defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
	}
	}
	let Predicates = [HasAVX512] in {
	def : Pat<(v16f32 (ffloor VR512:$src)),
	(VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
	def : Pat<(v16f32 (fnearbyint VR512:$src)),
	(VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
	def : Pat<(v16f32 (fceil VR512:$src)),
	(VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
	def : Pat<(v16f32 (frint VR512:$src)),
	(VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
	def : Pat<(v16f32 (ftrunc VR512:$src)),
	(VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;

	def : Pat<(v8f64 (ffloor VR512:$src)),
	(VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
	def : Pat<(v8f64 (fnearbyint VR512:$src)),
	(VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
	def : Pat<(v8f64 (fceil VR512:$src)),
	(VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
	def : Pat<(v8f64 (frint VR512:$src)),
	(VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
	def : Pat<(v8f64 (ftrunc VR512:$src)),
	(VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
	}

	defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
	defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
	defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
	defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;

	let Predicates = [HasAVX512] in {
	// Provide fallback in case the load node that is used in the broadcast
	// patterns above is used by additional users, which prevents the pattern
	// selection.
	def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
	(VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	0)>;
	def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
	(VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	0)>;

	def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
	(VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	0)>;
	def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
	(VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	0)>;

	def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
	(VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	0)>;

	def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
	(VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	(INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
	0)>;
	}

	multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I> {
	defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
	AVX512AIi8Base, EVEX_4V;
	}

	defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>,
	EVEX_CD8<32, CD8VF>;
	defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>,
	EVEX_CD8<64, CD8VF>, VEX_W;

	multiclass avx512_vpalignr_lowering<X86VectorVTInfo _ , list<Predicate> p>{
	let Predicates = p in
	def NAME#_.VTName#rri:
	Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
	(!cast<Instruction>(NAME#_.ZSuffix#rri)
	_.RC:$src1, _.RC:$src2, imm:$imm)>;
	}

	multiclass avx512_vpalignr_lowering_common<AVX512VLVectorVTInfo _>:
	avx512_vpalignr_lowering<_.info512, [HasBWI]>,
	avx512_vpalignr_lowering<_.info128, [HasBWI, HasVLX]>,
	avx512_vpalignr_lowering<_.info256, [HasBWI, HasVLX]>;

	defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
	avx512vl_i8_info, avx512vl_i8_info>,
	avx512_vpalignr_lowering_common<avx512vl_i16_info>,
	avx512_vpalignr_lowering_common<avx512vl_i32_info>,
	avx512_vpalignr_lowering_common<avx512vl_f32_info>,
	avx512_vpalignr_lowering_common<avx512vl_i64_info>,
	avx512_vpalignr_lowering_common<avx512vl_f64_info>,
	EVEX_CD8<8, CD8VF>;

	defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
	avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;

	multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src1), OpcodeStr,
	"$src1", "$src1",
	(_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;

	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.MemOp:$src1), OpcodeStr,
	"$src1", "$src1",
	(_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
	EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
	}
	}

	multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> :
	avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
	defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src1), OpcodeStr,
	"${src1}"##_.BroadcastStr,
	"${src1}"##_.BroadcastStr,
	(_.VT (OpNode (X86VBroadcast
	(_.ScalarLdFrag addr:$src1))))>,
	EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
	}

	multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo, Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
	EVEX_V256;
	defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info128>,
	EVEX_V128;
	}
	}

	multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo, Predicate prd> {
	let Predicates = [prd] in
	defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
	EVEX_V512;

	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
	EVEX_V256;
	defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
	EVEX_V128;
	}
	}

	multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
	SDNode OpNode, Predicate prd> {
	defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, avx512vl_i64_info,
	prd>, VEX_W;
	defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, avx512vl_i32_info,
	prd>;
	}

	multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
	SDNode OpNode, Predicate prd> {
	defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, avx512vl_i16_info, prd>;
	defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, avx512vl_i8_info, prd>;
	}

	multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
	bits<8> opc_d, bits<8> opc_q,
	string OpcodeStr, SDNode OpNode> {
	defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
	HasAVX512>,
	avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
	HasBWI>;
	}

	defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;

	// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
	let Predicates = [HasAVX512, NoVLX] in {
	def : Pat<(v4i64 (abs VR256X:$src)),
	(EXTRACT_SUBREG
	(VPABSQZrr
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
	sub_ymm)>;
	def : Pat<(v2i64 (abs VR128X:$src)),
	(EXTRACT_SUBREG
	(VPABSQZrr
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
	sub_xmm)>;
	}

	multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{

	defm NAME : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;
	}

	defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
	defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;

	// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
	let Predicates = [HasCDI, NoVLX] in {
	def : Pat<(v4i64 (ctlz VR256X:$src)),
	(EXTRACT_SUBREG
	(VPLZCNTQZrr
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
	sub_ymm)>;
	def : Pat<(v2i64 (ctlz VR128X:$src)),
	(EXTRACT_SUBREG
	(VPLZCNTQZrr
	(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
	sub_xmm)>;

	def : Pat<(v8i32 (ctlz VR256X:$src)),
	(EXTRACT_SUBREG
	(VPLZCNTDZrr
	(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
	sub_ymm)>;
	def : Pat<(v4i32 (ctlz VR128X:$src)),
	(EXTRACT_SUBREG
	(VPLZCNTDZrr
	(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
	sub_xmm)>;
	}

	//===---------------------------------------------------------------------===//
	// Counts number of ones - VPOPCNTD and VPOPCNTQ
	//===---------------------------------------------------------------------===//

	multiclass avx512_unary_rmb_popcnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo VTInfo> {
	let Predicates = [HasVPOPCNTDQ] in
	defm Z : avx512_unary_rmb<opc, OpcodeStr, ctpop, VTInfo>, EVEX_V512;
	}

	// Use 512bit version to implement 128/256 bit.
	multiclass avx512_unary_lowering<SDNode OpNode, AVX512VLVectorVTInfo _, Predicate prd> {
	let Predicates = [prd] in {
	def Z256_Alt : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
	(EXTRACT_SUBREG
	(!cast<Instruction>(NAME # "Zrr")
	(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
	_.info256.RC:$src1,
	_.info256.SubRegIdx)),
	_.info256.SubRegIdx)>;

	def Z128_Alt : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
	(EXTRACT_SUBREG
	(!cast<Instruction>(NAME # "Zrr")
	(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
	_.info128.RC:$src1,
	_.info128.SubRegIdx)),
	_.info128.SubRegIdx)>;
	}
	}

	defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", v16i32_info>,
	avx512_unary_lowering<ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
	defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", v8i64_info>,
	avx512_unary_lowering<ctpop, avx512vl_i64_info, HasVPOPCNTDQ>, VEX_W;

	//===---------------------------------------------------------------------===//
	// Replicate Single FP - MOVSHDUP and MOVSLDUP
	//===---------------------------------------------------------------------===//
	multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode>{
	defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, avx512vl_f32_info,
	HasAVX512>, XS;
	}

	defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>;
	defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;

	//===----------------------------------------------------------------------===//
	// AVX-512 - MOVDDUP
	//===----------------------------------------------------------------------===//

	multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	let ExeDomain = _.ExeDomain in {
	defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src), OpcodeStr, "$src", "$src",
	(_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
	defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
	(_.VT (OpNode (_.VT (scalar_to_vector
	(_.ScalarLdFrag addr:$src)))))>,
	EVEX, EVEX_CD8<_.EltSize, CD8VH>;
	}
	}

	multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
	AVX512VLVectorVTInfo VTInfo> {

	defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;

	let Predicates = [HasAVX512, HasVLX] in {
	defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
	EVEX_V256;
	defm Z128 : avx512_movddup_128<opc, OpcodeStr, OpNode, VTInfo.info128>,
	EVEX_V128;
	}
	}

	multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode>{
	defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode,
	avx512vl_f64_info>, XD, VEX_W;
	}

	defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;

	let Predicates = [HasVLX] in {
	def : Pat<(X86Movddup (loadv2f64 addr:$src)),
	(VMOVDDUPZ128rm addr:$src)>;
	def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
	(VMOVDDUPZ128rm addr:$src)>;
	def : Pat<(v2f64 (X86VBroadcast f64:$src)),
	(VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;

	def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
	(v2f64 VR128X:$src0)),
	(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
	(bitconvert (v4i32 immAllZerosV))),
	(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;

	def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
	(v2f64 VR128X:$src0)),
	(VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
	(COPY_TO_REGCLASS FR64X:$src, VR128X))>;
	def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
	(bitconvert (v4i32 immAllZerosV))),
	(VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;

	def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
	(v2f64 VR128X:$src0)),
	(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
	def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
	(bitconvert (v4i32 immAllZerosV))),
	(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
	}

	//===----------------------------------------------------------------------===//
	// AVX-512 - Unpack Instructions
	//===----------------------------------------------------------------------===//
	defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
	SSE_ALU_ITINS_S>;
	defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
	SSE_ALU_ITINS_S>;

	defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
	SSE_INTALU_ITINS_P, HasBWI>;
	defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
	SSE_INTALU_ITINS_P, HasBWI>;
	defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
	SSE_INTALU_ITINS_P, HasBWI>;
	defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
	SSE_INTALU_ITINS_P, HasBWI>;

	defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
	SSE_INTALU_ITINS_P, HasAVX512>;
	defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
	SSE_INTALU_ITINS_P, HasAVX512>;
	defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
	SSE_INTALU_ITINS_P, HasAVX512>;
	defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
	SSE_INTALU_ITINS_P, HasAVX512>;

	//===----------------------------------------------------------------------===//
	// AVX-512 - Extract & Insert Integer Instructions
	//===----------------------------------------------------------------------===//

	multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _> {
	def mr : AVX512Ii8<opc, MRMDestMem, (outs),
	(ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(store (_.EltVT (trunc (assertzext (OpNode (_.VT _.RC:$src1),
	imm:$src2)))),
	addr:$dst)]>,
	EVEX, EVEX_CD8<_.EltSize, CD8VT1>;
	}

	multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
	let Predicates = [HasBWI] in {
	def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
	(ins _.RC:$src1, u8imm:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set GR32orGR64:$dst,
	(X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
	EVEX, TAPD;

	defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
	}
	}

	multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
	let Predicates = [HasBWI] in {
	def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
	(ins _.RC:$src1, u8imm:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set GR32orGR64:$dst,
	(X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
	EVEX, PD;

	let hasSideEffects = 0 in
	def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
	(ins _.RC:$src1, u8imm:$src2),
	OpcodeStr#".s\t{$src2, $src1, $dst\|$dst, $src1, $src2}", []>,
	EVEX, TAPD, FoldGenData<NAME#rr>;

	defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
	}
	}

	multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
	RegisterClass GRC> {
	let Predicates = [HasDQI] in {
	def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
	(ins _.RC:$src1, u8imm:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(set GRC:$dst,
	(extractelt (_.VT _.RC:$src1), imm:$src2))]>,
	EVEX, TAPD;

	def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
	(ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
	OpcodeStr#"\t{$src2, $src1, $dst\|$dst, $src1, $src2}",
	[(store (extractelt (_.VT _.RC:$src1),
	imm:$src2),addr:$dst)]>,
	EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD;
	}
	}

	defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>;
	defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>;
	defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
	defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;

	multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, PatFrag LdFrag> {
	def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
	(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
	OpcodeStr#"\t{$src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3}",
	[(set _.RC:$dst,
	(_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
	EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
	}

	multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, PatFrag LdFrag> {
	let Predicates = [HasBWI] in {
	def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
	OpcodeStr#"\t{$src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3}",
	[(set _.RC:$dst,
	(OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V;

	defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
	}
	}

	multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
	X86VectorVTInfo _, RegisterClass GRC> {
	let Predicates = [HasDQI] in {
	def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
	(ins _.RC:$src1, GRC:$src2, u8imm:$src3),
	OpcodeStr#"\t{$src3, $src2, $src1, $dst\|$dst, $src1, $src2, $src3}",
	[(set _.RC:$dst,
	(_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
	EVEX_4V, TAPD;

	defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
	_.ScalarLdFrag>, TAPD;
	}
	}

	defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
	extloadi8>, TAPD;
	defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
	extloadi16>, PD;
	defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
	defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
	//===----------------------------------------------------------------------===//
	// VSHUFPS - VSHUFPD Operations
	//===----------------------------------------------------------------------===//
	multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
	AVX512VLVectorVTInfo VTInfo_FP>{
	defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp>,
	EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
	AVX512AIi8Base, EVEX_4V;
	}

	defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
	defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
	//===----------------------------------------------------------------------===//
	// AVX-512 - Byte shift Left/Right
	//===----------------------------------------------------------------------===//

	multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
	Format MRMm, string OpcodeStr, X86VectorVTInfo _>{
	def rr : AVX512<opc, MRMr,
	(outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>;
	def rm : AVX512<opc, MRMm,
	(outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _.RC:$dst,(_.VT (OpNode
	(_.VT (bitconvert (_.LdFrag addr:$src1))),
	(i8 imm:$src2))))]>;
	}

	multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
	Format MRMm, string OpcodeStr, Predicate prd>{
	let Predicates = [prd] in
	defm Z512 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
	OpcodeStr, v64i8_info>, EVEX_V512;
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
	OpcodeStr, v32i8x_info>, EVEX_V256;
	defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
	OpcodeStr, v16i8x_info>, EVEX_V128;
	}
	}
	defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
	HasBWI>, AVX512PDIi8Base, EVEX_4V;
	defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
	HasBWI>, AVX512PDIi8Base, EVEX_4V;


	multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
	string OpcodeStr, X86VectorVTInfo _dst,
	X86VectorVTInfo _src>{
	def rr : AVX512BI<opc, MRMSrcReg,
	(outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _dst.RC:$dst,(_dst.VT
	(OpNode (_src.VT _src.RC:$src1),
	(_src.VT _src.RC:$src2))))]>;
	def rm : AVX512BI<opc, MRMSrcMem,
	(outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
	!strconcat(OpcodeStr, "\t{$src2, $src1, $dst\|$dst, $src1, $src2}"),
	[(set _dst.RC:$dst,(_dst.VT
	(OpNode (_src.VT _src.RC:$src1),
	(_src.VT (bitconvert
	(_src.LdFrag addr:$src2))))))]>;
	}

	multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
	string OpcodeStr, Predicate prd> {
	let Predicates = [prd] in
	defm Z512 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v8i64_info,
	v64i8_info>, EVEX_V512;
	let Predicates = [prd, HasVLX] in {
	defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v4i64x_info,
	v32i8x_info>, EVEX_V256;
	defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v2i64x_info,
	v16i8x_info>, EVEX_V128;
	}
	}

	defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
	HasBWI>, EVEX_4V;

	// Transforms to swizzle an immediate to enable better matching when
	// memory operand isn't in the right place.
	def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
	// Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
	uint8_t Imm = N->getZExtValue();
	// Swap bits 1/4 and 3/6.
	uint8_t NewImm = Imm & 0xa5;
	if (Imm & 0x02) NewImm \|= 0x10;
	if (Imm & 0x10) NewImm \|= 0x02;
	if (Imm & 0x08) NewImm \|= 0x40;
	if (Imm & 0x40) NewImm \|= 0x08;
	return getI8Imm(NewImm, SDLoc(N));
	}]>;
	def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
	// Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
	uint8_t Imm = N->getZExtValue();
	// Swap bits 2/4 and 3/5.
	uint8_t NewImm = Imm & 0xc3;
	if (Imm & 0x04) NewImm \|= 0x10;
	if (Imm & 0x10) NewImm \|= 0x04;
	if (Imm & 0x08) NewImm \|= 0x20;
	if (Imm & 0x20) NewImm \|= 0x08;
	return getI8Imm(NewImm, SDLoc(N));
	}]>;
	def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
	// Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
	uint8_t Imm = N->getZExtValue();
	// Swap bits 1/2 and 5/6.
	uint8_t NewImm = Imm & 0x99;
	if (Imm & 0x02) NewImm \|= 0x04;
	if (Imm & 0x04) NewImm \|= 0x02;
	if (Imm & 0x20) NewImm \|= 0x40;
	if (Imm & 0x40) NewImm \|= 0x20;
	return getI8Imm(NewImm, SDLoc(N));
	}]>;
	def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
	// Convert a VPTERNLOG immediate by moving operand 1 to the end.
	uint8_t Imm = N->getZExtValue();
	// Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
	uint8_t NewImm = Imm & 0x81;
	if (Imm & 0x02) NewImm \|= 0x04;
	if (Imm & 0x04) NewImm \|= 0x10;
	if (Imm & 0x08) NewImm \|= 0x40;
	if (Imm & 0x10) NewImm \|= 0x02;
	if (Imm & 0x20) NewImm \|= 0x08;
	if (Imm & 0x40) NewImm \|= 0x20;
	return getI8Imm(NewImm, SDLoc(N));
	}]>;
	def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
	// Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
	uint8_t Imm = N->getZExtValue();
	// Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
	uint8_t NewImm = Imm & 0x81;
	if (Imm & 0x02) NewImm \|= 0x10;
	if (Imm & 0x04) NewImm \|= 0x02;
	if (Imm & 0x08) NewImm \|= 0x20;
	if (Imm & 0x10) NewImm \|= 0x04;
	if (Imm & 0x20) NewImm \|= 0x40;
	if (Imm & 0x40) NewImm \|= 0x08;
	return getI8Imm(NewImm, SDLoc(N));
	}]>;

	multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _>{
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
	defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
	OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_.VT _.RC:$src3),
	(i8 imm:$src4)), 1, 1>, AVX512AIi8Base, EVEX_4V;
	defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
	OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_.VT (bitconvert (_.LdFrag addr:$src3))),
	(i8 imm:$src4)), 1, 0>,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
	defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
	OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
	"$src2, ${src3}"##_.BroadcastStr##", $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
	(i8 imm:$src4)), 1, 0>, EVEX_B,
	AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
	}// Constraints = "$src1 = $dst"

	// Additional patterns for matching passthru operand in other positions.
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;

	// Additional patterns for matching loads in other positions.
	def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
	(!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
	addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
	def : Pat<(_.VT (OpNode _.RC:$src1,
	(bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src2, (i8 imm:$src4))),
	(!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
	addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;

	// Additional patterns for matching zero masking with loads in other
	// positions.
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
	_.ImmAllZerosV)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src2, (i8 imm:$src4)),
	_.ImmAllZerosV)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;

	// Additional patterns for matching masked loads with different
	// operand orders.
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src2, (i8 imm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode _.RC:$src2, _.RC:$src1,
	(bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src1, (i8 imm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode (bitconvert (_.LdFrag addr:$src3)),
	_.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;

	// Additional patterns for matching broadcasts in other positions.
	def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	_.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
	(!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
	addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
	def : Pat<(_.VT (OpNode _.RC:$src1,
	(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	_.RC:$src2, (i8 imm:$src4))),
	(!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
	addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;

	// Additional patterns for matching zero masking with broadcasts in other
	// positions.
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
	_.ImmAllZerosV)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
	_.KRCWM:$mask, _.RC:$src2, addr:$src3,
	(VPTERNLOG321_imm8 imm:$src4))>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode _.RC:$src1,
	(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	_.RC:$src2, (i8 imm:$src4)),
	_.ImmAllZerosV)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
	_.KRCWM:$mask, _.RC:$src2, addr:$src3,
	(VPTERNLOG132_imm8 imm:$src4))>;

	// Additional patterns for matching masked broadcasts with different
	// operand orders.
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode _.RC:$src1,
	(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	_.RC:$src2, (i8 imm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode _.RC:$src2, _.RC:$src1,
	(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	(i8 imm:$src4)), _.RC:$src1)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode _.RC:$src2,
	(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	_.RC:$src1, (i8 imm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
	def : Pat<(_.VT (vselect _.KRCWM:$mask,
	(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
	_.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
	_.RC:$src1)),
	(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
	_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
	}

	multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
	let Predicates = [HasAVX512] in
	defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512;
	let Predicates = [HasAVX512, HasVLX] in {
	defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128;
	defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256;
	}
	}

	defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>;
	defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;

	//===----------------------------------------------------------------------===//
	// AVX-512 - FixupImm
	//===----------------------------------------------------------------------===//

	multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _>{
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
	defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
	OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_.IntVT _.RC:$src3),
	(i32 imm:$src4),
	(i32 FROUND_CURRENT))>;
	defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
	OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_.IntVT (bitconvert (_.LdFrag addr:$src3))),
	(i32 imm:$src4),
	(i32 FROUND_CURRENT))>;
	defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
	OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
	"$src2, ${src3}"##_.BroadcastStr##", $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
	(i32 imm:$src4),
	(i32 FROUND_CURRENT))>, EVEX_B;
	} // Constraints = "$src1 = $dst"
	}

	multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
	SDNode OpNode, X86VectorVTInfo _>{
	let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
	defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
	OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
	"$src2, $src3, {sae}, $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_.IntVT _.RC:$src3),
	(i32 imm:$src4),
	(i32 FROUND_NO_EXC))>, EVEX_B;
	}
	}

	multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
	X86VectorVTInfo _, X86VectorVTInfo _src3VT> {
	let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
	ExeDomain = _.ExeDomain in {
	defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
	OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_src3VT.VT _src3VT.RC:$src3),
	(i32 imm:$src4),
	(i32 FROUND_CURRENT))>;

	defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
	OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
	"$src2, $src3, {sae}, $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_src3VT.VT _src3VT.RC:$src3),
	(i32 imm:$src4),
	(i32 FROUND_NO_EXC))>, EVEX_B;
	defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
	(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
	OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
	(OpNode (_.VT _.RC:$src1),
	(_.VT _.RC:$src2),
	(_src3VT.VT (scalar_to_vector
	(_src3VT.ScalarLdFrag addr:$src3))),
	(i32 imm:$src4),
	(i32 FROUND_CURRENT))>;
	}
	}

	multiclass avx512_fixupimm_packed_all<AVX512VLVectorVTInfo _Vec>{
	let Predicates = [HasAVX512] in
	defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>,
	avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>,
	AVX512AIi8Base, EVEX_4V, EVEX_V512;
	let Predicates = [HasAVX512, HasVLX] in {
	defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info128>,
	AVX512AIi8Base, EVEX_4V, EVEX_V128;
	defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info256>,
	AVX512AIi8Base, EVEX_4V, EVEX_V256;
	}
	}

	defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
	f32x_info, v4i32x_info>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
	defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
	f64x_info, v2i64x_info>,
	AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
	defm VFIXUPIMMPS : avx512_fixupimm_packed_all<avx512vl_f32_info>,
	EVEX_CD8<32, CD8VF>;
	defm VFIXUPIMMPD : avx512_fixupimm_packed_all<avx512vl_f64_info>,
	EVEX_CD8<64, CD8VF>, VEX_W;



	// Patterns used to select SSE scalar fp arithmetic instructions from
	// either:
	//
	// (1) a scalar fp operation followed by a blend
	//
	// The effect is that the backend no longer emits unnecessary vector
	// insert instructions immediately after SSE scalar fp instructions
	// like addss or mulss.
	//
	// For example, given the following code:
	// __m128 foo(__m128 A, __m128 B) {
	// A[0] += B[0];
	// return A;
	// }
	//
	// Previously we generated:
	// addss %xmm0, %xmm1
	// movss %xmm1, %xmm0
	//
	// We now generate:
	// addss %xmm1, %xmm0
	//
	// (2) a vector packed single/double fp operation followed by a vector insert
	//
	// The effect is that the backend converts the packed fp instruction
	// followed by a vector insert into a single SSE scalar fp instruction.
	//
	// For example, given the following code:
	// __m128 foo(__m128 A, __m128 B) {
	// __m128 C = A + B;
	// return (__m128) {c[0], a[1], a[2], a[3]};
	// }
	//
	// Previously we generated:
	// addps %xmm0, %xmm1
	// movss %xmm1, %xmm0
	//
	// We now generate:
	// addss %xmm1, %xmm0

	// TODO: Some canonicalization in lowering would simplify the number of
	// patterns we have to try to match.
	multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
	let Predicates = [HasAVX512] in {
	// extracted scalar math op with insert via movss
	def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
	(Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
	FR32X:$src))))),
	(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
	(COPY_TO_REGCLASS FR32X:$src, VR128X))>;

	// extracted scalar math op with insert via blend
	def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
	(Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
	FR32X:$src))), (i8 1))),
	(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
	(COPY_TO_REGCLASS FR32X:$src, VR128X))>;

	// vector math op with insert via movss
	def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
	(Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
	(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;

	// vector math op with insert via blend
	def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst),
	(Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)), (i8 1))),
	(!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;

	// extracted masked scalar math op with insert via movss
	def : Pat<(X86Movss (v4f32 VR128X:$src1),
	(scalar_to_vector
	(X86selects VK1WM:$mask,
	(Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
	FR32X:$src2),
	FR32X:$src0))),
	(!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
	VK1WM:$mask, v4f32:$src1,
	(COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
	}
	}

	defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
	defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
	defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
	defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;

	multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
	let Predicates = [HasAVX512] in {
	// extracted scalar math op with insert via movsd
	def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
	(Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
	FR64X:$src))))),
	(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
	(COPY_TO_REGCLASS FR64X:$src, VR128X))>;

	// extracted scalar math op with insert via blend
	def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
	(Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
	FR64X:$src))), (i8 1))),
	(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
	(COPY_TO_REGCLASS FR64X:$src, VR128X))>;

	// vector math op with insert via movsd
	def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
	(Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
	(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;

	// vector math op with insert via blend
	def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst),
	(Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)), (i8 1))),
	(!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;

	// extracted masked scalar math op with insert via movss
	def : Pat<(X86Movsd (v2f64 VR128X:$src1),
	(scalar_to_vector
	(X86selects VK1WM:$mask,
	(Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
	FR64X:$src2),
	FR64X:$src0))),
	(!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
	VK1WM:$mask, v2f64:$src1,
	(COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
	}
	}

	defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
	defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
	defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
	defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
	Index: head/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
	===================================================================
	--- head/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td (revision 322854)
	+++ head/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td (revision 322855)
	@@ -1,2687 +1,275 @@
	//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----- tablegen --=//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the machine model for Sandy Bridge to support instruction
	// scheduling and other instruction cost heuristics.
	//
	//===----------------------------------------------------------------------===//

	def SandyBridgeModel : SchedMachineModel {
	// All x86 instructions are modeled as a single micro-op, and SB can decode 4
	// instructions per cycle.
	// FIXME: Identify instructions that aren't a single fused micro-op.
	let IssueWidth = 4;
	let MicroOpBufferSize = 168; // Based on the reorder buffer.
	let LoadLatency = 4;
	let MispredictPenalty = 16;

	// Based on the LSD (loop-stream detector) queue size.
	let LoopMicroOpBufferSize = 28;

	- // This flag is set to allow the scheduler to assign
	- // a default model to unrecognized opcodes.
	+ // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
	+ // the scheduler to assign a default model to unrecognized opcodes.
	let CompleteModel = 0;
	}

	let SchedModel = SandyBridgeModel in {

	// Sandy Bridge can issue micro-ops to 6 different ports in one cycle.

	// Ports 0, 1, and 5 handle all computation.
	def SBPort0 : ProcResource<1>;
	def SBPort1 : ProcResource<1>;
	def SBPort5 : ProcResource<1>;

	// Ports 2 and 3 are identical. They handle loads and the address half of
	// stores.
	def SBPort23 : ProcResource<2>;

	// Port 4 gets the data half of stores. Store data can be available later than
	// the store address, but since we don't model the latency of stores, we can
	// ignore that.
	def SBPort4 : ProcResource<1>;

	// Many micro-ops are capable of issuing on multiple ports.
	-def SBPort01 : ProcResGroup<[SBPort0, SBPort1]>;
	def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>;
	def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>;
	def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;

	// 54 Entry Unified Scheduler
	def SBPortAny : ProcResGroup<[SBPort0, SBPort1, SBPort23, SBPort4, SBPort5]> {
	let BufferSize=54;
	}

	// Integer division issued on port 0.
	def SBDivider : ProcResource<1>;

	// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
	// cycles after the memory operand.
	def : ReadAdvance<ReadAfterLd, 4>;

	// Many SchedWrites are defined in pairs with and without a folded load.
	// Instructions with folded loads are usually micro-fused, so they only appear
	// as two micro-ops when queued in the reservation station.
	// This multiclass defines the resource usage for variants with and without
	// folded loads.
	multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
	ProcResourceKind ExePort,
	int Lat> {
	// Register variant is using a single cycle on ExePort.
	def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }

	// Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
	// latency.
	def : WriteRes<SchedRW.Folded, [SBPort23, ExePort]> {
	let Latency = !add(Lat, 4);
	}
	}

	// A folded store needs a cycle on port 4 for the store data, but it does not
	// need an extra port 2/3 cycle to recompute the address.
	def : WriteRes<WriteRMW, [SBPort4]>;

	def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
	def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 4; }
	def : WriteRes<WriteMove, [SBPort015]>;
	def : WriteRes<WriteZero, []>;

	defm : SBWriteResPair<WriteALU, SBPort015, 1>;
	defm : SBWriteResPair<WriteIMul, SBPort1, 3>;
	def : WriteRes<WriteIMulH, []> { let Latency = 3; }
	defm : SBWriteResPair<WriteShift, SBPort05, 1>;
	defm : SBWriteResPair<WriteJump, SBPort5, 1>;

	// This is for simple LEAs with one or two input operands.
	// The complex ones can only execute on port 1, and they require two cycles on
	// the port to read all inputs. We don't model that.
	def : WriteRes<WriteLEA, [SBPort15]>;

	// This is quite rough, latency depends on the dividend.
	def : WriteRes<WriteIDiv, [SBPort0, SBDivider]> {
	let Latency = 25;
	let ResourceCycles = [1, 10];
	}
	def : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> {
	let Latency = 29;
	let ResourceCycles = [1, 1, 10];
	}

	// Scalar and vector floating point.
	defm : SBWriteResPair<WriteFAdd, SBPort1, 3>;
	defm : SBWriteResPair<WriteFMul, SBPort0, 5>;
	-defm : SBWriteResPair<WriteFDiv, SBPort0, 24>;
	+defm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles.
	defm : SBWriteResPair<WriteFRcp, SBPort0, 5>;
	defm : SBWriteResPair<WriteFRsqrt, SBPort0, 5>;
	-defm : SBWriteResPair<WriteFSqrt, SBPort0, 14>;
	+defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>;
	defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
	defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
	defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>;
	defm : SBWriteResPair<WriteFShuffle, SBPort5, 1>;
	defm : SBWriteResPair<WriteFBlend, SBPort05, 1>;
	def : WriteRes<WriteFVarBlend, [SBPort0, SBPort5]> {
	let Latency = 2;
	let ResourceCycles = [1, 1];
	}
	def : WriteRes<WriteFVarBlendLd, [SBPort0, SBPort5, SBPort23]> {
	let Latency = 6;
	let ResourceCycles = [1, 1, 1];
	}

	// Vector integer operations.
	-defm : SBWriteResPair<WriteVecShift, SBPort5, 1>;
	-defm : SBWriteResPair<WriteVecLogic, SBPort5, 1>;
	-defm : SBWriteResPair<WriteVecALU, SBPort1, 3>;
	+defm : SBWriteResPair<WriteVecShift, SBPort05, 1>;
	+defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>;
	+defm : SBWriteResPair<WriteVecALU, SBPort15, 1>;
	defm : SBWriteResPair<WriteVecIMul, SBPort0, 5>;
	-defm : SBWriteResPair<WriteShuffle, SBPort5, 1>;
	+defm : SBWriteResPair<WriteShuffle, SBPort15, 1>;
	defm : SBWriteResPair<WriteBlend, SBPort15, 1>;
	def : WriteRes<WriteVarBlend, [SBPort1, SBPort5]> {
	let Latency = 2;
	let ResourceCycles = [1, 1];
	}
	def : WriteRes<WriteVarBlendLd, [SBPort1, SBPort5, SBPort23]> {
	let Latency = 6;
	let ResourceCycles = [1, 1, 1];
	}
	-def : WriteRes<WriteMPSAD, [SBPort0,SBPort15]> {
	- let Latency = 5;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,2];
	+def : WriteRes<WriteMPSAD, [SBPort0, SBPort1, SBPort5]> {
	+ let Latency = 6;
	+ let ResourceCycles = [1, 1, 1];
	}
	-def : WriteRes<WriteMPSADLd, [SBPort0,SBPort23,SBPort15]> {
	- let Latency = 11;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,1,2];
	+def : WriteRes<WriteMPSADLd, [SBPort0, SBPort1, SBPort5, SBPort23]> {
	+ let Latency = 6;
	+ let ResourceCycles = [1, 1, 1, 1];
	}

	////////////////////////////////////////////////////////////////////////////////
	// Horizontal add/sub instructions.
	////////////////////////////////////////////////////////////////////////////////
	// HADD, HSUB PS/PD
	// x,x / v,v,v.
	def : WriteRes<WriteFHAdd, [SBPort1]> {
	let Latency = 3;
	}

	// x,m / v,v,m.
	def : WriteRes<WriteFHAddLd, [SBPort1, SBPort23]> {
	let Latency = 7;
	let ResourceCycles = [1, 1];
	}

	// PHADD\|PHSUB (S) W/D.
	// v <- v,v.
	def : WriteRes<WritePHAdd, [SBPort15]>;

	// v <- v,m.
	def : WriteRes<WritePHAddLd, [SBPort15, SBPort23]> {
	let Latency = 5;
	let ResourceCycles = [1, 1];
	}

	// String instructions.
	// Packed Compare Implicit Length Strings, Return Mask
	def : WriteRes<WritePCmpIStrM, [SBPort015]> {
	let Latency = 11;
	let ResourceCycles = [3];
	}
	def : WriteRes<WritePCmpIStrMLd, [SBPort015, SBPort23]> {
	let Latency = 11;
	let ResourceCycles = [3, 1];
	}

	// Packed Compare Explicit Length Strings, Return Mask
	def : WriteRes<WritePCmpEStrM, [SBPort015]> {
	let Latency = 11;
	let ResourceCycles = [8];
	}
	def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> {
	let Latency = 11;
	let ResourceCycles = [7, 1];
	}

	// Packed Compare Implicit Length Strings, Return Index
	-def : WriteRes<WritePCmpIStrI, [SBPort0]> {
	- let Latency = 11;
	- let NumMicroOps = 3;
	+def : WriteRes<WritePCmpIStrI, [SBPort015]> {
	+ let Latency = 3;
	let ResourceCycles = [3];
	}
	-def : WriteRes<WritePCmpIStrILd, [SBPort0,SBPort23]> {
	- let Latency = 17;
	- let NumMicroOps = 4;
	- let ResourceCycles = [3,1];
	+def : WriteRes<WritePCmpIStrILd, [SBPort015, SBPort23]> {
	+ let Latency = 3;
	+ let ResourceCycles = [3, 1];
	}

	// Packed Compare Explicit Length Strings, Return Index
	def : WriteRes<WritePCmpEStrI, [SBPort015]> {
	let Latency = 4;
	let ResourceCycles = [8];
	}
	def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> {
	let Latency = 4;
	let ResourceCycles = [7, 1];
	}

	// AES Instructions.
	-def : WriteRes<WriteAESDecEnc, [SBPort5,SBPort015]> {
	- let Latency = 7;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	+def : WriteRes<WriteAESDecEnc, [SBPort015]> {
	+ let Latency = 8;
	+ let ResourceCycles = [2];
	}
	-def : WriteRes<WriteAESDecEncLd, [SBPort5,SBPort23,SBPort015]> {
	- let Latency = 13;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	+def : WriteRes<WriteAESDecEncLd, [SBPort015, SBPort23]> {
	+ let Latency = 8;
	+ let ResourceCycles = [2, 1];
	}

	-def : WriteRes<WriteAESIMC, [SBPort5]> {
	- let Latency = 12;
	- let NumMicroOps = 2;
	+def : WriteRes<WriteAESIMC, [SBPort015]> {
	+ let Latency = 8;
	let ResourceCycles = [2];
	}
	-def : WriteRes<WriteAESIMCLd, [SBPort5,SBPort23]> {
	- let Latency = 18;
	- let NumMicroOps = 3;
	- let ResourceCycles = [2,1];
	+def : WriteRes<WriteAESIMCLd, [SBPort015, SBPort23]> {
	+ let Latency = 8;
	+ let ResourceCycles = [2, 1];
	}

	def : WriteRes<WriteAESKeyGen, [SBPort015]> {
	let Latency = 8;
	let ResourceCycles = [11];
	}
	def : WriteRes<WriteAESKeyGenLd, [SBPort015, SBPort23]> {
	let Latency = 8;
	let ResourceCycles = [10, 1];
	}

	// Carry-less multiplication instructions.
	def : WriteRes<WriteCLMul, [SBPort015]> {
	let Latency = 14;
	let ResourceCycles = [18];
	}
	def : WriteRes<WriteCLMulLd, [SBPort015, SBPort23]> {
	let Latency = 14;
	let ResourceCycles = [17, 1];
	}


	def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; }
	def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
	def : WriteRes<WriteFence, [SBPort23, SBPort4]>;
	def : WriteRes<WriteNop, []>;

	// AVX2 is not supported on that architecture, but we should define the basic
	// scheduling resources anyway.
	defm : SBWriteResPair<WriteFShuffle256, SBPort0, 1>;
	defm : SBWriteResPair<WriteShuffle256, SBPort0, 1>;
	defm : SBWriteResPair<WriteVarVecShift, SBPort0, 1>;
	-
	-// Remaining SNB instrs.
	-
	-def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> {
	- let Latency = 1;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup0], (instregex "CVTSS2SDrr")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "PSLLDri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "PSLLQri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "PSLLWri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "PSRADri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "PSRAWri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "PSRLDri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "PSRLQri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "PSRLWri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VCVTSS2SDrr")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VPMOVMSKBrr")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VPSLLDri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VPSLLQri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VPSLLWri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VPSRADri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VPSRAWri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VPSRLDri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VPSRLQri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VPSRLWri")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDYrr")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDrr")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSYrr")>;
	-def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSrr")>;
	-
	-def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> {
	- let Latency = 1;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup1], (instregex "COMP_FST0r")>;
	-def: InstRW<[SBWriteResGroup1], (instregex "COM_FST0r")>;
	-def: InstRW<[SBWriteResGroup1], (instregex "UCOM_FPr")>;
	-def: InstRW<[SBWriteResGroup1], (instregex "UCOM_Fr")>;
	-
	-def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> {
	- let Latency = 1;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup2], (instregex "ANDNPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "ANDNPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "ANDPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "ANDPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "FDECSTP")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "FFREE")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "FINCSTP")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "FNOP")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "INSERTPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "JMP64r")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "LD_Frr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOV64toPQIrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOVAPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOVAPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOVDDUPrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOVDI2PDIrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOVHLPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOVLHPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOVSDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOVSHDUPrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOVSLDUPrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOVSSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOVUPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "MOVUPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "ORPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "ORPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "RETQ")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "SHUFPDrri")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "SHUFPSrri")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "ST_FPrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "ST_Frr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VANDPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VEXTRACTF128rr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VINSERTF128rr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VINSERTPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVSSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VORPDYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VORPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VORPSYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VORPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDri")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrm")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSri")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrm")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDYrri")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDrri")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSYrri")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSrri")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSYrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VXORPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "VXORPSrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "XORPDrr")>;
	-def: InstRW<[SBWriteResGroup2], (instregex "XORPSrr")>;
	-
	-def SBWriteResGroup3 : SchedWriteRes<[SBPort01]> {
	- let Latency = 1;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup3], (instregex "LEA64_32r")>;
	-
	-def SBWriteResGroup4 : SchedWriteRes<[SBPort0]> {
	- let Latency = 1;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup4], (instregex "BLENDPDrri")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "BLENDPSrri")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "BT32ri8")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "BT32rr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "BTC32ri8")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "BTC32rr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "BTR32ri8")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "BTR32rr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "BTS32ri8")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "BTS32rr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "CDQ")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "CQO")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "LAHF")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SAHF")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SAR32ri")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SAR8ri")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETAEr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETBr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETEr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETGEr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETGr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETLEr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETLr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETNEr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETNOr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETNPr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETNSr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETOr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETPr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SETSr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SHL32ri")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SHL64r1")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SHL8r1")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SHL8ri")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SHR32ri")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "SHR8ri")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDYrri")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDrri")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSYrri")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSrri")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQAYrr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQArr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUYrr")>;
	-def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUrr")>;
	-
	-def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
	- let Latency = 1;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup5], (instregex "KORTESTBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr64")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSDrr64")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSWrr64")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PADDQirr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNR64irr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSHUFBrr64")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNBrr64")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNDrr64")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNWrr64")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PABSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PABSDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PABSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PACKSSDWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PACKSSWBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PACKUSDWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PACKUSWBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PADDBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PADDDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PADDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PADDSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PADDSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PADDUSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PADDUSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PADDWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PALIGNRrri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PAVGBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PAVGWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PBLENDWrri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMAXSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMAXSDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMAXSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMAXUBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMAXUDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMAXUWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMINSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMINSDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMINSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMINUBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMINUDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMINUWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSHUFBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSHUFDri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSHUFHWri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSHUFLWri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSIGNBrr128")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSIGNDrr128")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSIGNWrr128")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSLLDQri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSRLDQri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSUBBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSUBDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSUBQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSUBSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSUBSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PSUBWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHBWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHQDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHWDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLBWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLQDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLWDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VMASKMOVPSYrm")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPABSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPABSDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPABSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSDWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSWBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSDWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSWBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPADDBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPADDDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPADDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPALIGNRrri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPAVGBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPAVGWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPBLENDWrri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMINSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMINSDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMINSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMINUBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMINUDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMINUWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFDri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFLWri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNBrr128")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNDrr128")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNWrr128")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSLLDQri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSRLDQri")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSBrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPSUBWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHBWrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHWDrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLQDQrr")>;
	-def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLWDrr")>;
	-
	-def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> {
	- let Latency = 1;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup6], (instregex "ADD32ri8")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "ADD32rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "ADD8ri")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "ADD8rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "AND32ri")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "AND64ri8")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "AND64rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "AND8ri")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "AND8rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "CBW")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "CMC")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "CMP16ri8")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "CMP32i32")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "CMP64rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "CMP8ri")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "CMP8rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "CWDE")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "DEC64r")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "DEC8r")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "INC64r")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "INC8r")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVD64from64rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "MOV32rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "MOV8ri")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "MOV8rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "MOVDQArr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "MOVDQUrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "MOVPQI2QIrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr16")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr8")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr16")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr8")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "NEG64r")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "NEG8r")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "NOT64r")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "NOT8r")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "OR64ri8")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "OR64rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "OR8ri")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "OR8rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "PANDNrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "PANDrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "PORrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "PXORrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "STC")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "SUB64ri8")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "SUB64rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "SUB8ri")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "SUB8rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "TEST64rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "TEST8ri")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "TEST8rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "VMOVPQI2QIrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "VMOVZPQILo2PQIrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "VPANDNrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "VPANDrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "VPORrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "VPXORrr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "XOR32rr")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "XOR64ri8")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "XOR8ri")>;
	-def: InstRW<[SBWriteResGroup6], (instregex "XOR8rr")>;
	-
	-def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> {
	- let Latency = 2;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPDrr")>;
	-def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPSrr")>;
	-def: InstRW<[SBWriteResGroup7], (instregex "MOVPDI2DIrr")>;
	-def: InstRW<[SBWriteResGroup7], (instregex "MOVPQIto64rr")>;
	-def: InstRW<[SBWriteResGroup7], (instregex "PMOVMSKBrr")>;
	-def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDYrr")>;
	-def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDrr")>;
	-def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPSrr")>;
	-def: InstRW<[SBWriteResGroup7], (instregex "VMOVPDI2DIrr")>;
	-def: InstRW<[SBWriteResGroup7], (instregex "VMOVPQIto64rr")>;
	-
	-def SBWriteResGroup9 : SchedWriteRes<[SBPort0]> {
	- let Latency = 2;
	- let NumMicroOps = 2;
	- let ResourceCycles = [2];
	-}
	-def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPDrr0")>;
	-def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPSrr0")>;
	-def: InstRW<[SBWriteResGroup9], (instregex "ROL32ri")>;
	-def: InstRW<[SBWriteResGroup9], (instregex "ROL8ri")>;
	-def: InstRW<[SBWriteResGroup9], (instregex "ROR32ri")>;
	-def: InstRW<[SBWriteResGroup9], (instregex "ROR8ri")>;
	-def: InstRW<[SBWriteResGroup9], (instregex "SETAr")>;
	-def: InstRW<[SBWriteResGroup9], (instregex "SETBEr")>;
	-def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDYrr")>;
	-def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDrr")>;
	-def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSYrr")>;
	-def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSrr")>;
	-
	-def SBWriteResGroup10 : SchedWriteRes<[SBPort15]> {
	- let Latency = 2;
	- let NumMicroOps = 2;
	- let ResourceCycles = [2];
	-}
	-def: InstRW<[SBWriteResGroup10], (instregex "VPBLENDVBrr")>;
	-
	-def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
	- let Latency = 2;
	- let NumMicroOps = 2;
	- let ResourceCycles = [2];
	-}
	-def: InstRW<[SBWriteResGroup11], (instregex "SCASB")>;
	-def: InstRW<[SBWriteResGroup11], (instregex "SCASL")>;
	-def: InstRW<[SBWriteResGroup11], (instregex "SCASQ")>;
	-def: InstRW<[SBWriteResGroup11], (instregex "SCASW")>;
	-
	-def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> {
	- let Latency = 2;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup12], (instregex "COMISDrr")>;
	-def: InstRW<[SBWriteResGroup12], (instregex "COMISSrr")>;
	-def: InstRW<[SBWriteResGroup12], (instregex "UCOMISDrr")>;
	-def: InstRW<[SBWriteResGroup12], (instregex "UCOMISSrr")>;
	-def: InstRW<[SBWriteResGroup12], (instregex "VCOMISDrr")>;
	-def: InstRW<[SBWriteResGroup12], (instregex "VCOMISSrr")>;
	-def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISDrr")>;
	-def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISSrr")>;
	-
	-def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort5]> {
	- let Latency = 2;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup13], (instregex "CVTPS2PDrr")>;
	-def: InstRW<[SBWriteResGroup13], (instregex "PTESTrr")>;
	-def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDYrr")>;
	-def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDrr")>;
	-def: InstRW<[SBWriteResGroup13], (instregex "VPTESTYrr")>;
	-def: InstRW<[SBWriteResGroup13], (instregex "VPTESTrr")>;
	-
	-def SBWriteResGroup14 : SchedWriteRes<[SBPort0,SBPort15]> {
	- let Latency = 2;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup14], (instregex "PSLLDrr")>;
	-def: InstRW<[SBWriteResGroup14], (instregex "PSLLQrr")>;
	-def: InstRW<[SBWriteResGroup14], (instregex "PSLLWrr")>;
	-def: InstRW<[SBWriteResGroup14], (instregex "PSRADrr")>;
	-def: InstRW<[SBWriteResGroup14], (instregex "PSRAWrr")>;
	-def: InstRW<[SBWriteResGroup14], (instregex "PSRLDrr")>;
	-def: InstRW<[SBWriteResGroup14], (instregex "PSRLQrr")>;
	-def: InstRW<[SBWriteResGroup14], (instregex "PSRLWrr")>;
	-def: InstRW<[SBWriteResGroup14], (instregex "VPSRADrr")>;
	-def: InstRW<[SBWriteResGroup14], (instregex "VPSRAWrr")>;
	-def: InstRW<[SBWriteResGroup14], (instregex "VPSRLDrr")>;
	-def: InstRW<[SBWriteResGroup14], (instregex "VPSRLQrr")>;
	-def: InstRW<[SBWriteResGroup14], (instregex "VPSRLWrr")>;
	-
	-def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
	- let Latency = 2;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup15], (instregex "FNSTSW16r")>;
	-
	-def SBWriteResGroup16 : SchedWriteRes<[SBPort1,SBPort0]> {
	- let Latency = 2;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup16], (instregex "BSWAP32r")>;
	-
	-def SBWriteResGroup17 : SchedWriteRes<[SBPort5,SBPort15]> {
	- let Latency = 2;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup17], (instregex "PINSRBrr")>;
	-def: InstRW<[SBWriteResGroup17], (instregex "PINSRDrr")>;
	-def: InstRW<[SBWriteResGroup17], (instregex "PINSRQrr")>;
	-def: InstRW<[SBWriteResGroup17], (instregex "PINSRWrri")>;
	-def: InstRW<[SBWriteResGroup17], (instregex "VPINSRBrr")>;
	-def: InstRW<[SBWriteResGroup17], (instregex "VPINSRDrr")>;
	-def: InstRW<[SBWriteResGroup17], (instregex "VPINSRQrr")>;
	-def: InstRW<[SBWriteResGroup17], (instregex "VPINSRWrri")>;
	-
	-def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
	- let Latency = 2;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>;
	-
	-def SBWriteResGroup19 : SchedWriteRes<[SBPort0,SBPort015]> {
	- let Latency = 2;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup19], (instregex "ADC64ri8")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "ADC64rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "ADC8ri")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "ADC8rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVAE32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVB32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVE32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVG32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVGE32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVL32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVLE32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVNE32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVNO32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVNP32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVNS32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVO32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVP32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "CMOVS32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "SBB32rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "SBB64ri8")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "SBB8ri")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "SBB8rr")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "SHLD32rri8")>;
	-def: InstRW<[SBWriteResGroup19], (instregex "SHRD32rri8")>;
	-
	-def SBWriteResGroup20 : SchedWriteRes<[SBPort0]> {
	- let Latency = 3;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMADDUBSWrr64")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULHRSWrr64")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULUDQirr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "PMADDUBSWrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "PMADDWDrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "PMULDQrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "PMULHRSWrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "PMULHUWrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "PMULHWrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "PMULLDrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "PMULLWrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "PMULUDQrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "PSADBWrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "VMOVMSKPSYrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "VPMADDUBSWrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "VPMADDWDrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "VPMULDQrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "VPMULHRSWrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "VPMULHWrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "VPMULLDrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "VPMULLWrr")>;
	-def: InstRW<[SBWriteResGroup20], (instregex "VPSADBWrr")>;
	-
	-def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
	- let Latency = 3;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup21], (instregex "ADDPDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "ADDPSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "ADDSDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "ADDSSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "ADD_FST0r")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "ADD_FrST0")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "BSF32rr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "BSR32rr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "CMPPDrri")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "CMPPSrri")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "CMPSDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "CMPSSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r32")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r8")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "CVTDQ2PSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "CVTPS2DQrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "CVTTPS2DQrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "MAXPDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "MAXPSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "MAXSDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "MAXSSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "MINPDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "MINPSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "MINSDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "MINSSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPS2PIirr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTTPS2PIirr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "MUL8r")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "POPCNT32rr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPDr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPSr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSDr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSSr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "SUBPDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "SUBPSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FPrST0")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FST0r")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FrST0")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "SUBSDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "SUBSSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "SUB_FPrST0")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "SUB_FST0r")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "SUB_FrST0")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VADDPDYrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VADDPDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VADDPSYrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VADDPSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VADDSDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VADDSSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDYrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSYrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VBROADCASTF128")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDYrri")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDrri")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSYrri")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSrri")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VCMPSDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VCMPSSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSYrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQYrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VCVTTPS2DQrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDYrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSYrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VMAXSDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VMAXSSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VMINPDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VMINPSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VMINSDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VMINSSrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPDr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPSr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VROUNDSDr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDYrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSYrr")>;
	-def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSrr")>;
	-
	-def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
	- let Latency = 3;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup22], (instregex "EXTRACTPSrr")>;
	-def: InstRW<[SBWriteResGroup22], (instregex "VEXTRACTPSrr")>;
	-
	-def SBWriteResGroup23 : SchedWriteRes<[SBPort0,SBPort15]> {
	- let Latency = 3;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup23], (instregex "PEXTRBrr")>;
	-def: InstRW<[SBWriteResGroup23], (instregex "PEXTRDrr")>;
	-def: InstRW<[SBWriteResGroup23], (instregex "PEXTRQrr")>;
	-def: InstRW<[SBWriteResGroup23], (instregex "PEXTRWri")>;
	-def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRBrr")>;
	-def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRDrr")>;
	-def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRQrr")>;
	-def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRWri")>;
	-def: InstRW<[SBWriteResGroup23], (instregex "SHL64rCL")>;
	-def: InstRW<[SBWriteResGroup23], (instregex "SHL8rCL")>;
	-
	-def SBWriteResGroup24 : SchedWriteRes<[SBPort15]> {
	- let Latency = 3;
	- let NumMicroOps = 3;
	- let ResourceCycles = [3];
	-}
	-def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDSWrr64")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDWrr64")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDrr64")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBDrr64")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBSWrr64")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBWrr64")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "PHADDDrr")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "PHADDSWrr128")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "PHADDWrr")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "PHSUBDrr")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "PHSUBSWrr128")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "PHSUBWrr")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "VPHADDDrr")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "VPHADDSWrr128")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "VPHADDWrr")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBDrr")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBSWrr128")>;
	-def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBWrr")>;
	-
	-def SBWriteResGroup25 : SchedWriteRes<[SBPort015]> {
	- let Latency = 3;
	- let NumMicroOps = 3;
	- let ResourceCycles = [3];
	-}
	-def: InstRW<[SBWriteResGroup25], (instregex "LEAVE64")>;
	-def: InstRW<[SBWriteResGroup25], (instregex "XADD32rr")>;
	-def: InstRW<[SBWriteResGroup25], (instregex "XADD8rr")>;
	-
	-def SBWriteResGroup26 : SchedWriteRes<[SBPort0,SBPort015]> {
	- let Latency = 3;
	- let NumMicroOps = 3;
	- let ResourceCycles = [2,1];
	-}
	-def: InstRW<[SBWriteResGroup26], (instregex "CMOVA32rr")>;
	-def: InstRW<[SBWriteResGroup26], (instregex "CMOVBE32rr")>;
	-
	-def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> {
	- let Latency = 4;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup27], (instregex "MUL64r")>;
	-
	-def SBWriteResGroup28 : SchedWriteRes<[SBPort1,SBPort5]> {
	- let Latency = 4;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup28], (instregex "CVTDQ2PDrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2DQrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2PSrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "CVTSD2SSrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SD64rr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SDrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "CVTTPD2DQrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPD2PIirr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPI2PDirr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTTPD2PIirr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDYrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQYrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSYrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SD64rr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SDrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQYrr")>;
	-def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQrr")>;
	-
	-def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> {
	- let Latency = 4;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup29], (instregex "MOV64sr")>;
	-def: InstRW<[SBWriteResGroup29], (instregex "PAUSE")>;
	-
	-def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
	- let Latency = 5;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup30], (instregex "MULPDrr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "MULPSrr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "MULSDrr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "MULSSrr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "MUL_FPrST0")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "MUL_FST0r")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "MUL_FrST0")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "PCMPGTQrr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "PHMINPOSUWrr128")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "RCPPSr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "RCPSSr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "RSQRTPSr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "RSQRTSSr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "VMULPDYrr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "VMULPDrr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "VMULPSYrr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "VMULPSrr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "VMULSDrr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "VMULSSrr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "VPCMPGTQrr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "VPHMINPOSUWrr128")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTPSr")>;
	-def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTSSr")>;
	-
	-def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
	- let Latency = 5;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup31], (instregex "MOV32rm")>;
	-def: InstRW<[SBWriteResGroup31], (instregex "MOV8rm")>;
	-def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm16")>;
	-def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm8")>;
	-def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm16")>;
	-def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm8")>;
	-def: InstRW<[SBWriteResGroup31], (instregex "PREFETCH")>;
	-
	-def SBWriteResGroup32 : SchedWriteRes<[SBPort0,SBPort1]> {
	- let Latency = 5;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SI64rr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SIrr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SI64rr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SIrr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SI64rr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SIrr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SI64rr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SIrr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "VCVTSD2SI64rr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SI64rr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SIrr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SI64rr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SIrr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SI64rr")>;
	-def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SIrr")>;
	-
	-def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
	- let Latency = 5;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup33], (instregex "MOV64mr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOV8mr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVAPDmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVAPSmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVDQAmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVDQUmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVHPDmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVHPSmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVLPDmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVLPSmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTDQmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTI_64mr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTImr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPDmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPSmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVPDI2DImr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVPQI2QImr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVPQIto64mr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVSSmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVUPDmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "MOVUPSmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "PUSH64i8")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "PUSH64r")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VEXTRACTF128mr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDYmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSYmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAYmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUYmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPDmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPSmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPDmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPSmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQYmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDYmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSYmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVPDI2DImr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQI2QImr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQIto64mr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVSDmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVSSmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDYmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSYmr")>;
	-def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSmr")>;
	-
	-def SBWriteResGroup34 : SchedWriteRes<[SBPort0,SBPort15]> {
	- let Latency = 5;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,2];
	-}
	-def: InstRW<[SBWriteResGroup34], (instregex "MPSADBWrri")>;
	-def: InstRW<[SBWriteResGroup34], (instregex "VMPSADBWrri")>;
	-
	-def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> {
	- let Latency = 5;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,2];
	-}
	-def: InstRW<[SBWriteResGroup35], (instregex "CLI")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SS64rr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SSrr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "HADDPDrr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "HADDPSrr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "HSUBPDrr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "HSUBPSrr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SS64rr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SSrr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "VHADDPDrr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSYrr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSrr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDYrr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDrr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSYrr")>;
	-def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSrr")>;
	-
	-def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
	- let Latency = 5;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup36], (instregex "CALL64r")>;
	-def: InstRW<[SBWriteResGroup36], (instregex "EXTRACTPSmr")>;
	-def: InstRW<[SBWriteResGroup36], (instregex "VEXTRACTPSmr")>;
	-
	-def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> {
	- let Latency = 5;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDYrm")>;
	-def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDmr")>;
	-def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPSmr")>;
	-
	-def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
	- let Latency = 5;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup38], (instregex "SETAEm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETBm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETEm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETGEm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETGm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETLEm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETLm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETNEm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETNOm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETNPm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETNSm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETOm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETPm")>;
	-def: InstRW<[SBWriteResGroup38], (instregex "SETSm")>;
	-
	-def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> {
	- let Latency = 5;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup39], (instregex "PEXTRBmr")>;
	-def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRBmr")>;
	-def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRDmr")>;
	-def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRWmr")>;
	-
	-def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
	- let Latency = 5;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup40], (instregex "MOV8mi")>;
	-def: InstRW<[SBWriteResGroup40], (instregex "STOSB")>;
	-def: InstRW<[SBWriteResGroup40], (instregex "STOSL")>;
	-def: InstRW<[SBWriteResGroup40], (instregex "STOSQ")>;
	-def: InstRW<[SBWriteResGroup40], (instregex "STOSW")>;
	-
	-def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
	- let Latency = 5;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,3];
	-}
	-def: InstRW<[SBWriteResGroup41], (instregex "FNINIT")>;
	-
	-def SBWriteResGroup42 : SchedWriteRes<[SBPort0,SBPort015]> {
	- let Latency = 5;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,3];
	-}
	-def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG32rr")>;
	-def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG8rr")>;
	-
	-def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
	- let Latency = 5;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,1,2];
	-}
	-def: InstRW<[SBWriteResGroup43], (instregex "SETAm")>;
	-def: InstRW<[SBWriteResGroup43], (instregex "SETBEm")>;
	-
	-def SBWriteResGroup44 : SchedWriteRes<[SBPort0,SBPort4,SBPort5,SBPort23]> {
	- let Latency = 5;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup44], (instregex "LDMXCSR")>;
	-def: InstRW<[SBWriteResGroup44], (instregex "STMXCSR")>;
	-def: InstRW<[SBWriteResGroup44], (instregex "VLDMXCSR")>;
	-def: InstRW<[SBWriteResGroup44], (instregex "VSTMXCSR")>;
	-
	-def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> {
	- let Latency = 5;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup45], (instregex "PEXTRDmr")>;
	-def: InstRW<[SBWriteResGroup45], (instregex "PEXTRQmr")>;
	-def: InstRW<[SBWriteResGroup45], (instregex "VPEXTRQmr")>;
	-def: InstRW<[SBWriteResGroup45], (instregex "PUSHF16")>;
	-def: InstRW<[SBWriteResGroup45], (instregex "PUSHF64")>;
	-
	-def SBWriteResGroup46 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
	- let Latency = 5;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup46], (instregex "CLFLUSH")>;
	-
	-def SBWriteResGroup47 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
	- let Latency = 5;
	- let NumMicroOps = 5;
	- let ResourceCycles = [1,2,1,1];
	-}
	-def: InstRW<[SBWriteResGroup47], (instregex "FXRSTOR")>;
	-
	-def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> {
	- let Latency = 6;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup48], (instregex "LDDQUrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MMX_MOVD64from64rm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOV64toPQIrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOVAPDrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOVAPSrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOVDDUPrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOVDI2PDIrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOVDQArm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOVDQUrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOVNTDQArm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOVSHDUPrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOVSLDUPrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOVSSrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOVUPDrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "MOVUPSrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "POP64r")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VBROADCASTSSrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUYrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOV64toPQIrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPDrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPSrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVDDUPrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVDI2PDIrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQArm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQUrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVNTDQArm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVQI2PQIrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVSDrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVSHDUPrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVSLDUPrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVSSrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPDrm")>;
	-def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPSrm")>;
	-
	-def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> {
	- let Latency = 6;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup49], (instregex "JMP64m")>;
	-def: InstRW<[SBWriteResGroup49], (instregex "MOV64sm")>;
	-
	-def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort0]> {
	- let Latency = 6;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup50], (instregex "BT64mi8")>;
	-
	-def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> {
	- let Latency = 6;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSBrm64")>;
	-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSDrm64")>;
	-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSWrm64")>;
	-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PALIGNR64irm")>;
	-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSHUFBrm64")>;
	-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNBrm64")>;
	-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNDrm64")>;
	-def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNWrm64")>;
	-
	-def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
	- let Latency = 6;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup52], (instregex "ADD64rm")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "ADD8rm")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "AND64rm")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "AND8rm")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "CMP64mi8")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "CMP64mr")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "CMP64rm")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "CMP8mi")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "CMP8mr")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "CMP8rm")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "LODSL")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "LODSQ")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "OR64rm")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "OR8rm")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "SUB64rm")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "SUB8rm")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "XOR64rm")>;
	-def: InstRW<[SBWriteResGroup52], (instregex "XOR8rm")>;
	-
	-def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> {
	- let Latency = 6;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,2];
	-}
	-def: InstRW<[SBWriteResGroup53], (instregex "POP64rmm")>;
	-def: InstRW<[SBWriteResGroup53], (instregex "PUSH64rmm")>;
	-def: InstRW<[SBWriteResGroup53], (instregex "ST_F32m")>;
	-def: InstRW<[SBWriteResGroup53], (instregex "ST_F64m")>;
	-def: InstRW<[SBWriteResGroup53], (instregex "ST_FP32m")>;
	-def: InstRW<[SBWriteResGroup53], (instregex "ST_FP64m")>;
	-def: InstRW<[SBWriteResGroup53], (instregex "ST_FP80m")>;
	-
	-def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
	- let Latency = 7;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSDYrm")>;
	-def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSSrm")>;
	-def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPDYrm")>;
	-def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPSYrm")>;
	-def: InstRW<[SBWriteResGroup54], (instregex "VMOVDDUPYrm")>;
	-def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQAYrm")>;
	-def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQUYrm")>;
	-def: InstRW<[SBWriteResGroup54], (instregex "VMOVSHDUPYrm")>;
	-def: InstRW<[SBWriteResGroup54], (instregex "VMOVSLDUPYrm")>;
	-def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPDYrm")>;
	-def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPSYrm")>;
	-
	-def SBWriteResGroup55 : SchedWriteRes<[SBPort0,SBPort23]> {
	- let Latency = 7;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup55], (instregex "CVTPS2PDrm")>;
	-def: InstRW<[SBWriteResGroup55], (instregex "CVTSS2SDrm")>;
	-def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDYrm")>;
	-def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDrm")>;
	-def: InstRW<[SBWriteResGroup55], (instregex "VCVTSS2SDrm")>;
	-def: InstRW<[SBWriteResGroup55], (instregex "VTESTPDrm")>;
	-def: InstRW<[SBWriteResGroup55], (instregex "VTESTPSrm")>;
	-
	-def SBWriteResGroup56 : SchedWriteRes<[SBPort5,SBPort23]> {
	- let Latency = 7;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup56], (instregex "ANDNPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "ANDNPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "ANDPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "ANDPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "INSERTPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "MOVHPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "MOVHPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "MOVLPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "MOVLPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "ORPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "ORPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "SHUFPDrmi")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "SHUFPSrmi")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VANDNPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VANDNPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VANDPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VANDPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VINSERTPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VORPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VORPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDmi")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDri")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSmi")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSri")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPDrmi")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPSrmi")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VXORPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "VXORPSrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "XORPDrm")>;
	-def: InstRW<[SBWriteResGroup56], (instregex "XORPSrm")>;
	-
	-def SBWriteResGroup57 : SchedWriteRes<[SBPort5,SBPort015]> {
	- let Latency = 7;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup57], (instregex "AESDECLASTrr")>;
	-def: InstRW<[SBWriteResGroup57], (instregex "AESDECrr")>;
	-def: InstRW<[SBWriteResGroup57], (instregex "AESENCLASTrr")>;
	-def: InstRW<[SBWriteResGroup57], (instregex "AESENCrr")>;
	-def: InstRW<[SBWriteResGroup57], (instregex "KANDQrr")>;
	-def: InstRW<[SBWriteResGroup57], (instregex "VAESDECLASTrr")>;
	-def: InstRW<[SBWriteResGroup57], (instregex "VAESDECrr")>;
	-def: InstRW<[SBWriteResGroup57], (instregex "VAESENCrr")>;
	-
	-def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort0]> {
	- let Latency = 7;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup58], (instregex "BLENDPDrmi")>;
	-def: InstRW<[SBWriteResGroup58], (instregex "BLENDPSrmi")>;
	-def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPDrmi")>;
	-def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPSrmi")>;
	-def: InstRW<[SBWriteResGroup58], (instregex "VINSERTF128rm")>;
	-
	-def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
	- let Latency = 7;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PABSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PABSDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PABSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PACKSSDWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PACKSSWBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PACKUSDWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PACKUSWBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PADDBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PADDDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PADDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PADDSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PADDSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PADDUSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PADDUSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PADDWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PALIGNRrmi")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PAVGBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PAVGWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PBLENDWrmi")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PINSRBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PINSRDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PINSRQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PINSRWrmi")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMAXSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMAXSDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMAXSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMAXUBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMAXUDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMAXUWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMINSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMINSDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMINSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMINUBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMINUDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMINUWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSHUFBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSHUFDmi")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSHUFHWmi")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSHUFLWmi")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSIGNBrm128")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSIGNDrm128")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSIGNWrm128")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSUBBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSUBDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSUBQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSUBSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSUBSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PSUBWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHBWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHQDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHWDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLBWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLQDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLWDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPABSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPABSDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPABSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSDWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSWBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSDWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSWBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPADDBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPADDDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPADDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPADDSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPADDSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPADDWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPALIGNRrmi")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPAVGBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPAVGWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPBLENDWrmi")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPINSRBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPINSRDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPINSRQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPINSRWrmi")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMINSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMINSDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMINSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMINUBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMINUDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMINUWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFDmi")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFHWmi")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFLWmi")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNBrm128")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNDrm128")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNWrm128")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSBrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPSUBWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHBWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHQDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHWDrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLBWrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLQDQrm")>;
	-def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLWDrm")>;
	-
	-def SBWriteResGroup60 : SchedWriteRes<[SBPort23,SBPort015]> {
	- let Latency = 7;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup60], (instregex "PANDNrm")>;
	-def: InstRW<[SBWriteResGroup60], (instregex "PANDrm")>;
	-def: InstRW<[SBWriteResGroup60], (instregex "PORrm")>;
	-def: InstRW<[SBWriteResGroup60], (instregex "PXORrm")>;
	-def: InstRW<[SBWriteResGroup60], (instregex "VPANDNrm")>;
	-def: InstRW<[SBWriteResGroup60], (instregex "VPANDrm")>;
	-def: InstRW<[SBWriteResGroup60], (instregex "VPORrm")>;
	-def: InstRW<[SBWriteResGroup60], (instregex "VPXORrm")>;
	-
	-def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort0]> {
	- let Latency = 7;
	- let NumMicroOps = 3;
	- let ResourceCycles = [2,1];
	-}
	-def: InstRW<[SBWriteResGroup61], (instregex "VRCPPSr")>;
	-def: InstRW<[SBWriteResGroup61], (instregex "VRSQRTPSYr")>;
	-
	-def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
	- let Latency = 7;
	- let NumMicroOps = 3;
	- let ResourceCycles = [2,1];
	-}
	-def: InstRW<[SBWriteResGroup62], (instregex "VERRm")>;
	-def: InstRW<[SBWriteResGroup62], (instregex "VERWm")>;
	-
	-def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> {
	- let Latency = 7;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,2];
	-}
	-def: InstRW<[SBWriteResGroup63], (instregex "LODSB")>;
	-def: InstRW<[SBWriteResGroup63], (instregex "LODSW")>;
	-
	-def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
	- let Latency = 7;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup64], (instregex "FARJMP64")>;
	-
	-def SBWriteResGroup65 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> {
	- let Latency = 7;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup65], (instregex "ADC64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "ADC8rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVAE64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVB64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVE64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVG64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVGE64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVL64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVLE64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVNE64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVNO64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVNP64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVNS64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVO64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVP64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "CMOVS64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "SBB64rm")>;
	-def: InstRW<[SBWriteResGroup65], (instregex "SBB8rm")>;
	-
	-def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> {
	- let Latency = 7;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,1,2];
	-}
	-def: InstRW<[SBWriteResGroup66], (instregex "FNSTSWm")>;
	-
	-def SBWriteResGroup67 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> {
	- let Latency = 7;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,2,1];
	-}
	-def: InstRW<[SBWriteResGroup67], (instregex "SLDT32r")>;
	-def: InstRW<[SBWriteResGroup67], (instregex "STR32r")>;
	-
	-def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
	- let Latency = 7;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,1,2];
	-}
	-def: InstRW<[SBWriteResGroup68], (instregex "CALL64m")>;
	-def: InstRW<[SBWriteResGroup68], (instregex "FNSTCW16m")>;
	-
	-def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
	- let Latency = 7;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,2,1];
	-}
	-def: InstRW<[SBWriteResGroup69], (instregex "BTC64mi8")>;
	-def: InstRW<[SBWriteResGroup69], (instregex "BTR64mi8")>;
	-def: InstRW<[SBWriteResGroup69], (instregex "BTS64mi8")>;
	-def: InstRW<[SBWriteResGroup69], (instregex "SAR64mi")>;
	-def: InstRW<[SBWriteResGroup69], (instregex "SAR8mi")>;
	-def: InstRW<[SBWriteResGroup69], (instregex "SHL64m1")>;
	-def: InstRW<[SBWriteResGroup69], (instregex "SHL64mi")>;
	-def: InstRW<[SBWriteResGroup69], (instregex "SHL8m1")>;
	-def: InstRW<[SBWriteResGroup69], (instregex "SHL8mi")>;
	-def: InstRW<[SBWriteResGroup69], (instregex "SHR64mi")>;
	-def: InstRW<[SBWriteResGroup69], (instregex "SHR8mi")>;
	-
	-def SBWriteResGroup70 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
	- let Latency = 7;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,2,1];
	-}
	-def: InstRW<[SBWriteResGroup70], (instregex "ADD64mi8")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "ADD64mr")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "ADD8mi")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "ADD8mr")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "AND64mi8")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "AND64mr")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "AND8mi")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "AND8mr")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "DEC64m")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "DEC8m")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "INC64m")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "INC8m")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "NEG64m")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "NEG8m")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "NOT64m")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "NOT8m")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "OR64mi8")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "OR64mr")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "OR8mi")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "OR8mr")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "SUB64mi8")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "SUB64mr")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "SUB8mi")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "SUB8mr")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "TEST64rm")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "TEST8mi")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "TEST8rm")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "XOR64mi8")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "XOR64mr")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "XOR8mi")>;
	-def: InstRW<[SBWriteResGroup70], (instregex "XOR8mr")>;
	-
	-def SBWriteResGroup71 : SchedWriteRes<[SBPort0,SBPort23]> {
	- let Latency = 8;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMADDUBSWrm64")>;
	-def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMULHRSWrm64")>;
	-def: InstRW<[SBWriteResGroup71], (instregex "VTESTPDYrm")>;
	-def: InstRW<[SBWriteResGroup71], (instregex "VTESTPSYrm")>;
	-
	-def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> {
	- let Latency = 8;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup72], (instregex "BSF64rm")>;
	-def: InstRW<[SBWriteResGroup72], (instregex "BSR64rm")>;
	-def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m16")>;
	-def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m8")>;
	-def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m")>;
	-def: InstRW<[SBWriteResGroup72], (instregex "FCOM64m")>;
	-def: InstRW<[SBWriteResGroup72], (instregex "FCOMP32m")>;
	-def: InstRW<[SBWriteResGroup72], (instregex "FCOMP64m")>;
	-def: InstRW<[SBWriteResGroup72], (instregex "MUL8m")>;
	-
	-def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> {
	- let Latency = 8;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VANDNPSYrm")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VANDPDrm")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VANDPSrm")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VORPDYrm")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VORPSYrm")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDYri")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDmi")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSYri")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSmi")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPDYrmi")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPSYrmi")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPDrm")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPSrm")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPDYrm")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPSYrm")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VXORPDrm")>;
	-def: InstRW<[SBWriteResGroup73], (instregex "VXORPSrm")>;
	-
	-def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort0]> {
	- let Latency = 8;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPDYrmi")>;
	-def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPSYrmi")>;
	-
	-def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort0]> {
	- let Latency = 8;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,2];
	-}
	-def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPDrm0")>;
	-def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPSrm0")>;
	-def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPDrm")>;
	-def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPSrm")>;
	-def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm")>;
	-def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPSrm")>;
	-
	-def SBWriteResGroup76 : SchedWriteRes<[SBPort23,SBPort15]> {
	- let Latency = 8;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,2];
	-}
	-def: InstRW<[SBWriteResGroup76], (instregex "PBLENDVBrr0")>;
	-def: InstRW<[SBWriteResGroup76], (instregex "VPBLENDVBrm")>;
	-
	-def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
	- let Latency = 8;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup77], (instregex "COMISDrm")>;
	-def: InstRW<[SBWriteResGroup77], (instregex "COMISSrm")>;
	-def: InstRW<[SBWriteResGroup77], (instregex "UCOMISDrm")>;
	-def: InstRW<[SBWriteResGroup77], (instregex "UCOMISSrm")>;
	-def: InstRW<[SBWriteResGroup77], (instregex "VCOMISDrm")>;
	-def: InstRW<[SBWriteResGroup77], (instregex "VCOMISSrm")>;
	-def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISDrm")>;
	-def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISSrm")>;
	-
	-def SBWriteResGroup78 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> {
	- let Latency = 8;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup78], (instregex "PTESTrm")>;
	-def: InstRW<[SBWriteResGroup78], (instregex "VPTESTrm")>;
	-
	-def SBWriteResGroup79 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> {
	- let Latency = 8;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup79], (instregex "PSLLDrm")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "PSLLQrm")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "PSLLWrm")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "PSRADrm")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "PSRAWrm")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "PSRLDrm")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "PSRLQrm")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "PSRLWrm")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "VPSLLDri")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "VPSLLQri")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "VPSLLWri")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "VPSRADrm")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "VPSRAWrm")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "VPSRLDrm")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "VPSRLQrm")>;
	-def: InstRW<[SBWriteResGroup79], (instregex "VPSRLWrm")>;
	-
	-def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> {
	- let Latency = 8;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,3];
	-}
	-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDSWrm64")>;
	-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDWrm64")>;
	-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDrm64")>;
	-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBDrm64")>;
	-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBSWrm64")>;
	-def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBWrm64")>;
	-
	-def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> {
	- let Latency = 8;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,3];
	-}
	-def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG64rm")>;
	-def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG8rm")>;
	-
	-def SBWriteResGroup82 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> {
	- let Latency = 8;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,2,1];
	-}
	-def: InstRW<[SBWriteResGroup82], (instregex "CMOVA64rm")>;
	-def: InstRW<[SBWriteResGroup82], (instregex "CMOVBE64rm")>;
	-
	-def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> {
	- let Latency = 8;
	- let NumMicroOps = 5;
	- let ResourceCycles = [2,3];
	-}
	-def: InstRW<[SBWriteResGroup83], (instregex "CMPSB")>;
	-def: InstRW<[SBWriteResGroup83], (instregex "CMPSL")>;
	-def: InstRW<[SBWriteResGroup83], (instregex "CMPSQ")>;
	-def: InstRW<[SBWriteResGroup83], (instregex "CMPSW")>;
	-
	-def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
	- let Latency = 8;
	- let NumMicroOps = 5;
	- let ResourceCycles = [1,2,2];
	-}
	-def: InstRW<[SBWriteResGroup84], (instregex "FLDCW16m")>;
	-
	-def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> {
	- let Latency = 8;
	- let NumMicroOps = 5;
	- let ResourceCycles = [1,2,2];
	-}
	-def: InstRW<[SBWriteResGroup85], (instregex "ROL64mi")>;
	-def: InstRW<[SBWriteResGroup85], (instregex "ROL8mi")>;
	-def: InstRW<[SBWriteResGroup85], (instregex "ROR64mi")>;
	-def: InstRW<[SBWriteResGroup85], (instregex "ROR8mi")>;
	-
	-def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
	- let Latency = 8;
	- let NumMicroOps = 5;
	- let ResourceCycles = [1,2,2];
	-}
	-def: InstRW<[SBWriteResGroup86], (instregex "MOVSB")>;
	-def: InstRW<[SBWriteResGroup86], (instregex "MOVSL")>;
	-def: InstRW<[SBWriteResGroup86], (instregex "MOVSQ")>;
	-def: InstRW<[SBWriteResGroup86], (instregex "MOVSW")>;
	-def: InstRW<[SBWriteResGroup86], (instregex "XADD64rm")>;
	-def: InstRW<[SBWriteResGroup86], (instregex "XADD8rm")>;
	-
	-def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
	- let Latency = 8;
	- let NumMicroOps = 5;
	- let ResourceCycles = [1,1,1,2];
	-}
	-def: InstRW<[SBWriteResGroup87], (instregex "FARCALL64")>;
	-
	-def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> {
	- let Latency = 8;
	- let NumMicroOps = 5;
	- let ResourceCycles = [1,2,1,1];
	-}
	-def: InstRW<[SBWriteResGroup88], (instregex "SHLD64mri8")>;
	-def: InstRW<[SBWriteResGroup88], (instregex "SHRD64mri8")>;
	-
	-def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort23]> {
	- let Latency = 9;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup89], (instregex "MMX_PMULUDQirm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "PMADDUBSWrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "PMADDWDrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "PMULDQrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "PMULHRSWrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "PMULHUWrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "PMULHWrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "PMULLDrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "PMULLWrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "PMULUDQrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "PSADBWrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "VPMADDUBSWrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "VPMADDWDrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "VPMULDQrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "VPMULHRSWrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "VPMULHUWrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "VPMULHWrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "VPMULLDrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "VPMULLWrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "VPMULUDQrm")>;
	-def: InstRW<[SBWriteResGroup89], (instregex "VPSADBWrm")>;
	-
	-def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> {
	- let Latency = 9;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup90], (instregex "ADDPDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "ADDPSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "ADDSDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "ADDSSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "CMPPDrmi")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "CMPPSrmi")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "CMPSSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "CVTDQ2PSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "CVTPS2DQrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SD64rm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "CVTTPS2DQrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "MAXPDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "MAXPSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "MAXSDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "MAXSSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "MINPDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "MINPSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "MINSDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "MINSSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTTPS2PIirm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "POPCNT64rm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPDm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPSm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSDm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSSm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "SUBPDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "SUBPSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "SUBSDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "SUBSSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VADDPDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VADDPSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VADDSDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VADDSSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VCMPPDrmi")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VCMPPSrmi")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VCMPSDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VCMPSSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VCVTDQ2PSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VCVTPS2DQrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SD64rm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VCVTTPS2DQrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VMAXPDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VMAXPSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VMAXSDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VMAXSSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VMINPDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VMINPSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VMINSDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VMINSSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPDm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPSm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSDm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSSm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VSUBPDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VSUBPSrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VSUBSDrm")>;
	-def: InstRW<[SBWriteResGroup90], (instregex "VSUBSSrm")>;
	-
	-def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort0]> {
	- let Latency = 9;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,2];
	-}
	-def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPDYrm")>;
	-def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPSYrm")>;
	-def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDrm")>;
	-def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPSrm")>;
	-
	-def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
	- let Latency = 9;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup92], (instregex "DPPDrri")>;
	-def: InstRW<[SBWriteResGroup92], (instregex "VDPPDrri")>;
	-
	-def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
	- let Latency = 9;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SI64rm")>;
	-def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SIrm")>;
	-def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SI64rm")>;
	-def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SIrm")>;
	-def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SI64rm")>;
	-def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SIrm")>;
	-def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SI64rm")>;
	-def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SIrm")>;
	-def: InstRW<[SBWriteResGroup93], (instregex "MUL64m")>;
	-
	-def SBWriteResGroup94 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> {
	- let Latency = 9;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup94], (instregex "VPTESTYrm")>;
	-
	-def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
	- let Latency = 9;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup95], (instregex "LD_F32m")>;
	-def: InstRW<[SBWriteResGroup95], (instregex "LD_F64m")>;
	-def: InstRW<[SBWriteResGroup95], (instregex "LD_F80m")>;
	-
	-def SBWriteResGroup96 : SchedWriteRes<[SBPort23,SBPort15]> {
	- let Latency = 9;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,3];
	-}
	-def: InstRW<[SBWriteResGroup96], (instregex "PHADDDrm")>;
	-def: InstRW<[SBWriteResGroup96], (instregex "PHADDSWrm128")>;
	-def: InstRW<[SBWriteResGroup96], (instregex "PHADDWrm")>;
	-def: InstRW<[SBWriteResGroup96], (instregex "PHSUBDrm")>;
	-def: InstRW<[SBWriteResGroup96], (instregex "PHSUBSWrm128")>;
	-def: InstRW<[SBWriteResGroup96], (instregex "PHSUBWrm")>;
	-def: InstRW<[SBWriteResGroup96], (instregex "VPHADDDrm")>;
	-def: InstRW<[SBWriteResGroup96], (instregex "VPHADDSWrm128")>;
	-def: InstRW<[SBWriteResGroup96], (instregex "VPHADDWrm")>;
	-def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBDrm")>;
	-def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBSWrm128")>;
	-def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBWrm")>;
	-
	-def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
	- let Latency = 9;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,1,2];
	-}
	-def: InstRW<[SBWriteResGroup97], (instregex "IST_F16m")>;
	-def: InstRW<[SBWriteResGroup97], (instregex "IST_F32m")>;
	-def: InstRW<[SBWriteResGroup97], (instregex "IST_FP16m")>;
	-def: InstRW<[SBWriteResGroup97], (instregex "IST_FP32m")>;
	-def: InstRW<[SBWriteResGroup97], (instregex "IST_FP64m")>;
	-def: InstRW<[SBWriteResGroup97], (instregex "SHL64mCL")>;
	-def: InstRW<[SBWriteResGroup97], (instregex "SHL8mCL")>;
	-
	-def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
	- let Latency = 9;
	- let NumMicroOps = 6;
	- let ResourceCycles = [1,2,3];
	-}
	-def: InstRW<[SBWriteResGroup98], (instregex "ADC64mi8")>;
	-def: InstRW<[SBWriteResGroup98], (instregex "ADC8mi")>;
	-def: InstRW<[SBWriteResGroup98], (instregex "SBB64mi8")>;
	-def: InstRW<[SBWriteResGroup98], (instregex "SBB8mi")>;
	-
	-def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> {
	- let Latency = 9;
	- let NumMicroOps = 6;
	- let ResourceCycles = [1,2,2,1];
	-}
	-def: InstRW<[SBWriteResGroup99], (instregex "ADC64mr")>;
	-def: InstRW<[SBWriteResGroup99], (instregex "ADC8mr")>;
	-def: InstRW<[SBWriteResGroup99], (instregex "SBB64mr")>;
	-def: InstRW<[SBWriteResGroup99], (instregex "SBB8mr")>;
	-
	-def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort0,SBPort015]> {
	- let Latency = 9;
	- let NumMicroOps = 6;
	- let ResourceCycles = [1,1,2,1,1];
	-}
	-def: InstRW<[SBWriteResGroup100], (instregex "BT64mr")>;
	-def: InstRW<[SBWriteResGroup100], (instregex "BTC64mr")>;
	-def: InstRW<[SBWriteResGroup100], (instregex "BTR64mr")>;
	-def: InstRW<[SBWriteResGroup100], (instregex "BTS64mr")>;
	-
	-def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
	- let Latency = 10;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup101], (instregex "ADD_F32m")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "ADD_F64m")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "ILD_F16m")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "ILD_F32m")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "ILD_F64m")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F32m")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F64m")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "SUB_F32m")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "SUB_F64m")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VADDPDYrm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VADDPSYrm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPDYrm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPSYrm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VCMPPDYrmi")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VCMPPSYrmi")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VCVTDQ2PSYrm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VCVTPS2DQYrm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VCVTTPS2DQrm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VMAXPDYrm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VMAXPSYrm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VMINPDrm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VMINPSrm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPDm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPSm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VSUBPDYrm")>;
	-def: InstRW<[SBWriteResGroup101], (instregex "VSUBPSYrm")>;
	-
	-def SBWriteResGroup102 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
	- let Latency = 10;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rm")>;
	-def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rr")>;
	-def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SI64rm")>;
	-def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SIrm")>;
	-def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rm")>;
	-def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rr")>;
	-def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SI64rm")>;
	-def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SIrm")>;
	-
	-def SBWriteResGroup103 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
	- let Latency = 10;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup103], (instregex "CVTDQ2PDrm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2DQrm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2PSrm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "CVTSD2SSrm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SS64rm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SSrm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "CVTTPD2DQrm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPD2PIirm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPI2PDirm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTTPD2PIirm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDYrm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDrm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2DQrm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2PSrm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "VCVTSD2SSrm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SS64rm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SSrm")>;
	-def: InstRW<[SBWriteResGroup103], (instregex "VCVTTPD2DQrm")>;
	-
	-def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {
	- let Latency = 11;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup104], (instregex "MULPDrm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "MULPSrm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "MULSDrm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "MULSSrm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "PCMPGTQrm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "PHMINPOSUWrm128")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "RCPPSm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "RCPSSm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "RSQRTPSm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "RSQRTSSm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "VMULPDrm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "VMULPSrm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "VMULSDrm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "VMULSSrm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "VPCMPGTQrm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "VPHMINPOSUWrm128")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "VRCPPSm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "VRCPSSm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTPSm")>;
	-def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTSSm")>;
	-
	-def SBWriteResGroup105 : SchedWriteRes<[SBPort0]> {
	- let Latency = 11;
	- let NumMicroOps = 3;
	- let ResourceCycles = [3];
	-}
	-def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRIrr")>;
	-def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRM128rr")>;
	-def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRIrr")>;
	-def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRM128rr")>;
	-
	-def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> {
	- let Latency = 11;
	- let NumMicroOps = 3;
	- let ResourceCycles = [2,1];
	-}
	-def: InstRW<[SBWriteResGroup106], (instregex "FICOM16m")>;
	-def: InstRW<[SBWriteResGroup106], (instregex "FICOM32m")>;
	-def: InstRW<[SBWriteResGroup106], (instregex "FICOMP16m")>;
	-def: InstRW<[SBWriteResGroup106], (instregex "FICOMP32m")>;
	-
	-def SBWriteResGroup107 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
	- let Latency = 11;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2DQYrm")>;
	-def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2PSYrm")>;
	-def: InstRW<[SBWriteResGroup107], (instregex "VCVTTPD2DQYrm")>;
	-
	-def SBWriteResGroup108 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> {
	- let Latency = 11;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,1,2];
	-}
	-def: InstRW<[SBWriteResGroup108], (instregex "MPSADBWrmi")>;
	-def: InstRW<[SBWriteResGroup108], (instregex "VMPSADBWrmi")>;
	-
	-def SBWriteResGroup109 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
	- let Latency = 11;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,2,1];
	-}
	-def: InstRW<[SBWriteResGroup109], (instregex "HADDPDrm")>;
	-def: InstRW<[SBWriteResGroup109], (instregex "HADDPSrm")>;
	-def: InstRW<[SBWriteResGroup109], (instregex "HSUBPDrm")>;
	-def: InstRW<[SBWriteResGroup109], (instregex "HSUBPSrm")>;
	-def: InstRW<[SBWriteResGroup109], (instregex "VHADDPDrm")>;
	-def: InstRW<[SBWriteResGroup109], (instregex "VHADDPSrm")>;
	-def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPDrm")>;
	-def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPSrm")>;
	-
	-def SBWriteResGroup110 : SchedWriteRes<[SBPort5]> {
	- let Latency = 12;
	- let NumMicroOps = 2;
	- let ResourceCycles = [2];
	-}
	-def: InstRW<[SBWriteResGroup110], (instregex "AESIMCrr")>;
	-def: InstRW<[SBWriteResGroup110], (instregex "VAESIMCrr")>;
	-
	-def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
	- let Latency = 12;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup111], (instregex "MUL_F32m")>;
	-def: InstRW<[SBWriteResGroup111], (instregex "MUL_F64m")>;
	-def: InstRW<[SBWriteResGroup111], (instregex "VMULPDYrm")>;
	-def: InstRW<[SBWriteResGroup111], (instregex "VMULPSYrm")>;
	-
	-def SBWriteResGroup112 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
	- let Latency = 12;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,2,1];
	-}
	-def: InstRW<[SBWriteResGroup112], (instregex "DPPSrri")>;
	-def: InstRW<[SBWriteResGroup112], (instregex "VDPPSYrri")>;
	-def: InstRW<[SBWriteResGroup112], (instregex "VDPPSrri")>;
	-
	-def SBWriteResGroup113 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> {
	- let Latency = 12;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,2,1];
	-}
	-def: InstRW<[SBWriteResGroup113], (instregex "VHADDPDrm")>;
	-def: InstRW<[SBWriteResGroup113], (instregex "VHADDPSYrm")>;
	-def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPDYrm")>;
	-def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPSYrm")>;
	-
	-def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> {
	- let Latency = 13;
	- let NumMicroOps = 3;
	- let ResourceCycles = [2,1];
	-}
	-def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI16m")>;
	-def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI32m")>;
	-def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI16m")>;
	-def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI32m")>;
	-def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI16m")>;
	-def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI32m")>;
	-
	-def SBWriteResGroup115 : SchedWriteRes<[SBPort5,SBPort23,SBPort015]> {
	- let Latency = 13;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup115], (instregex "AESDECLASTrm")>;
	-def: InstRW<[SBWriteResGroup115], (instregex "AESDECrm")>;
	-def: InstRW<[SBWriteResGroup115], (instregex "AESENCLASTrm")>;
	-def: InstRW<[SBWriteResGroup115], (instregex "AESENCrm")>;
	-def: InstRW<[SBWriteResGroup115], (instregex "VAESDECLASTrm")>;
	-def: InstRW<[SBWriteResGroup115], (instregex "VAESDECrm")>;
	-def: InstRW<[SBWriteResGroup115], (instregex "VAESENCLASTrm")>;
	-def: InstRW<[SBWriteResGroup115], (instregex "VAESENCrm")>;
	-
	-def SBWriteResGroup116 : SchedWriteRes<[SBPort0]> {
	- let Latency = 14;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup116], (instregex "DIVPSrr")>;
	-def: InstRW<[SBWriteResGroup116], (instregex "DIVSSrr")>;
	-def: InstRW<[SBWriteResGroup116], (instregex "SQRTPSr")>;
	-def: InstRW<[SBWriteResGroup116], (instregex "SQRTSSr")>;
	-def: InstRW<[SBWriteResGroup116], (instregex "VDIVPSrr")>;
	-def: InstRW<[SBWriteResGroup116], (instregex "VDIVSSrr")>;
	-def: InstRW<[SBWriteResGroup116], (instregex "VSQRTPSr")>;
	-
	-def SBWriteResGroup117 : SchedWriteRes<[SBPort0,SBPort23]> {
	- let Latency = 14;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup117], (instregex "VSQRTSSm")>;
	-
	-def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
	- let Latency = 14;
	- let NumMicroOps = 4;
	- let ResourceCycles = [2,1,1];
	-}
	-def: InstRW<[SBWriteResGroup118], (instregex "VRCPPSm")>;
	-def: InstRW<[SBWriteResGroup118], (instregex "VRSQRTPSYm")>;
	-
	-def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
	- let Latency = 15;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI16m")>;
	-def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI32m")>;
	-
	-def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> {
	- let Latency = 15;
	- let NumMicroOps = 4;
	- let ResourceCycles = [1,1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup120], (instregex "DPPDrmi")>;
	-def: InstRW<[SBWriteResGroup120], (instregex "VDPPDrmi")>;
	-
	-def SBWriteResGroup121 : SchedWriteRes<[SBPort0,SBPort23]> {
	- let Latency = 17;
	- let NumMicroOps = 4;
	- let ResourceCycles = [3,1];
	-}
	-def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRIrm")>;
	-def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRM128rm")>;
	-def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRIrm")>;
	-def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRM128rm")>;
	-
	-def SBWriteResGroup122 : SchedWriteRes<[SBPort5,SBPort23]> {
	- let Latency = 18;
	- let NumMicroOps = 3;
	- let ResourceCycles = [2,1];
	-}
	-def: InstRW<[SBWriteResGroup122], (instregex "AESIMCrm")>;
	-def: InstRW<[SBWriteResGroup122], (instregex "VAESIMCrm")>;
	-
	-def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23]> {
	- let Latency = 20;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup123], (instregex "DIVPSrm")>;
	-def: InstRW<[SBWriteResGroup123], (instregex "DIVSSrm")>;
	-def: InstRW<[SBWriteResGroup123], (instregex "SQRTPSm")>;
	-def: InstRW<[SBWriteResGroup123], (instregex "SQRTSSm")>;
	-def: InstRW<[SBWriteResGroup123], (instregex "VDIVPSrm")>;
	-def: InstRW<[SBWriteResGroup123], (instregex "VDIVSSrm")>;
	-def: InstRW<[SBWriteResGroup123], (instregex "VSQRTPSm")>;
	-
	-def SBWriteResGroup124 : SchedWriteRes<[SBPort0]> {
	- let Latency = 21;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup124], (instregex "VSQRTSDr")>;
	-
	-def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23]> {
	- let Latency = 21;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup125], (instregex "VSQRTSDm")>;
	-
	-def SBWriteResGroup126 : SchedWriteRes<[SBPort0]> {
	- let Latency = 22;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup126], (instregex "DIVPDrr")>;
	-def: InstRW<[SBWriteResGroup126], (instregex "DIVSDrr")>;
	-def: InstRW<[SBWriteResGroup126], (instregex "SQRTPDr")>;
	-def: InstRW<[SBWriteResGroup126], (instregex "SQRTSDr")>;
	-def: InstRW<[SBWriteResGroup126], (instregex "VDIVPDrr")>;
	-def: InstRW<[SBWriteResGroup126], (instregex "VDIVSDrr")>;
	-def: InstRW<[SBWriteResGroup126], (instregex "VSQRTPDr")>;
	-
	-def SBWriteResGroup127 : SchedWriteRes<[SBPort0]> {
	- let Latency = 24;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FPrST0")>;
	-def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FST0r")>;
	-def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FrST0")>;
	-def: InstRW<[SBWriteResGroup127], (instregex "DIV_FPrST0")>;
	-def: InstRW<[SBWriteResGroup127], (instregex "DIV_FST0r")>;
	-def: InstRW<[SBWriteResGroup127], (instregex "DIV_FrST0")>;
	-
	-def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23]> {
	- let Latency = 28;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup128], (instregex "DIVPDrm")>;
	-def: InstRW<[SBWriteResGroup128], (instregex "DIVSDrm")>;
	-def: InstRW<[SBWriteResGroup128], (instregex "SQRTPDm")>;
	-def: InstRW<[SBWriteResGroup128], (instregex "SQRTSDm")>;
	-def: InstRW<[SBWriteResGroup128], (instregex "VDIVPDrm")>;
	-def: InstRW<[SBWriteResGroup128], (instregex "VDIVSDrm")>;
	-def: InstRW<[SBWriteResGroup128], (instregex "VSQRTPDm")>;
	-
	-def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort0]> {
	- let Latency = 29;
	- let NumMicroOps = 3;
	- let ResourceCycles = [2,1];
	-}
	-def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>;
	-def: InstRW<[SBWriteResGroup129], (instregex "VSQRTPSYr")>;
	-
	-def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> {
	- let Latency = 31;
	- let NumMicroOps = 2;
	- let ResourceCycles = [1,1];
	-}
	-def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F32m")>;
	-def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F64m")>;
	-def: InstRW<[SBWriteResGroup130], (instregex "DIV_F32m")>;
	-def: InstRW<[SBWriteResGroup130], (instregex "DIV_F64m")>;
	-
	-def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
	- let Latency = 34;
	- let NumMicroOps = 3;
	- let ResourceCycles = [1,1,1];
	-}
	-def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI16m")>;
	-def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI32m")>;
	-def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI16m")>;
	-def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI32m")>;
	-
	-def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
	- let Latency = 36;
	- let NumMicroOps = 4;
	- let ResourceCycles = [2,1,1];
	-}
	-def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>;
	-def: InstRW<[SBWriteResGroup132], (instregex "VSQRTPSYm")>;
	-
	-def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort0]> {
	- let Latency = 45;
	- let NumMicroOps = 3;
	- let ResourceCycles = [2,1];
	-}
	-def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>;
	-def: InstRW<[SBWriteResGroup133], (instregex "VSQRTPDYr")>;
	-
	-def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> {
	- let Latency = 52;
	- let NumMicroOps = 4;
	- let ResourceCycles = [2,1,1];
	-}
	-def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>;
	-def: InstRW<[SBWriteResGroup134], (instregex "VSQRTPDYm")>;
	-
	-def SBWriteResGroup135 : SchedWriteRes<[SBPort0]> {
	- let Latency = 114;
	- let NumMicroOps = 1;
	- let ResourceCycles = [1];
	-}
	-def: InstRW<[SBWriteResGroup135], (instregex "VSQRTSSr")>;
	-
	} // SchedModel
	Index: head/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
	===================================================================
	--- head/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp (revision 322854)
	+++ head/contrib/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp (revision 322855)
	@@ -1,167 +1,183 @@
	//===- DlltoolDriver.cpp - dlltool.exe-compatible driver ------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// Defines an interface to a dlltool.exe-compatible driver.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h"
	#include "llvm/Object/ArchiveWriter.h"
	#include "llvm/Object/COFF.h"
	#include "llvm/Object/COFFImportFile.h"
	#include "llvm/Object/COFFModuleDefinition.h"
	#include "llvm/Option/Arg.h"
	#include "llvm/Option/ArgList.h"
	#include "llvm/Option/Option.h"
	#include "llvm/Support/Path.h"

	#include <string>
	#include <vector>

	using namespace llvm;
	using namespace llvm::object;
	using namespace llvm::COFF;

	namespace {

	enum {
	OPT_INVALID = 0,
	#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID,
	#include "Options.inc"
	#undef OPTION
	};

	#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
	#include "Options.inc"
	#undef PREFIX

	static const llvm::opt::OptTable::Info infoTable[] = {
	#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \
	{X1, X2, X10, X11, OPT_##ID, llvm::opt::Option::KIND##Class, \
	X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12},
	#include "Options.inc"
	#undef OPTION
	};

	class DllOptTable : public llvm::opt::OptTable {
	public:
	DllOptTable() : OptTable(infoTable, false) {}
	};

	} // namespace

	std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs;

	// Opens a file. Path has to be resolved already.
	// Newly created memory buffers are owned by this driver.
	Optional<MemoryBufferRef> openFile(StringRef Path) {
	ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MB = MemoryBuffer::getFile(Path);

	if (std::error_code EC = MB.getError()) {
	llvm::errs() << "fail openFile: " << EC.message() << "\n";
	return None;
	}

	MemoryBufferRef MBRef = MB.get()->getMemBufferRef();
	OwningMBs.push_back(std::move(MB.get())); // take ownership
	return MBRef;
	}

	static MachineTypes getEmulation(StringRef S) {
	return StringSwitch<MachineTypes>(S)
	.Case("i386", IMAGE_FILE_MACHINE_I386)
	.Case("i386:x86-64", IMAGE_FILE_MACHINE_AMD64)
	.Case("arm", IMAGE_FILE_MACHINE_ARMNT)
	.Default(IMAGE_FILE_MACHINE_UNKNOWN);
	}

	static std::string getImplibPath(std::string Path) {
	SmallString<128> Out = StringRef("lib");
	Out.append(Path);
	sys::path::replace_extension(Out, ".a");
	return Out.str();
	}

	int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
	DllOptTable Table;
	unsigned MissingIndex;
	unsigned MissingCount;
	llvm::opt::InputArgList Args =
	Table.ParseArgs(ArgsArr.slice(1), MissingIndex, MissingCount);
	if (MissingCount) {
	llvm::errs() << Args.getArgString(MissingIndex) << ": missing argument\n";
	return 1;
	}

	// Handle when no input or output is specified
	if (Args.hasArgNoClaim(OPT_INPUT) \|\|
	(!Args.hasArgNoClaim(OPT_d) && !Args.hasArgNoClaim(OPT_l))) {
	Table.PrintHelp(outs(), ArgsArr[0], "dlltool", false);
	llvm::outs() << "\nTARGETS: i386, i386:x86-64, arm\n";
	return 1;
	}

	if (!Args.hasArgNoClaim(OPT_m) && Args.hasArgNoClaim(OPT_d)) {
	llvm::errs() << "error: no target machine specified\n"
	<< "supported targets: i386, i386:x86-64, arm\n";
	return 1;
	}

	for (auto *Arg : Args.filtered(OPT_UNKNOWN))
	llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";

	if (!Args.hasArg(OPT_d)) {
	llvm::errs() << "no definition file specified\n";
	return 1;
	}

	Optional<MemoryBufferRef> MB = openFile(Args.getLastArg(OPT_d)->getValue());
	if (!MB)
	return 1;

	if (!MB->getBufferSize()) {
	llvm::errs() << "definition file empty\n";
	return 1;
	}

	COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN;
	if (auto *Arg = Args.getLastArg(OPT_m))
	Machine = getEmulation(Arg->getValue());

	if (Machine == IMAGE_FILE_MACHINE_UNKNOWN) {
	llvm::errs() << "unknown target\n";
	return 1;
	}

	Expected<COFFModuleDefinition> Def =
	parseCOFFModuleDefinition(*MB, Machine, true);

	if (!Def) {
	llvm::errs() << "error parsing definition\n"
	<< errorToErrorCode(Def.takeError()).message();
	return 1;
	}

	// Do this after the parser because parseCOFFModuleDefinition sets OutputFile.
	if (auto *Arg = Args.getLastArg(OPT_D))
	Def->OutputFile = Arg->getValue();

	if (Def->OutputFile.empty()) {
	llvm::errs() << "no output file specified\n";
	return 1;
	}

	std::string Path = Args.getLastArgValue(OPT_l);
	if (Path.empty())
	Path = getImplibPath(Def->OutputFile);

	+ if (Machine == IMAGE_FILE_MACHINE_I386 && Args.getLastArg(OPT_k)) {
	+ for (COFFShortExport& E : Def->Exports) {
	+ if (E.isWeak() \|\| (!E.Name.empty() && E.Name[0] == '?'))
	+ continue;
	+ E.SymbolName = E.Name;
	+ // Trim off the trailing decoration. Symbols will always have a
	+ // starting prefix here (either _ for cdecl/stdcall, @ for fastcall
	+ // or ? for C++ functions). (Vectorcall functions also will end up having
	+ // a prefix here, even if they shouldn't.)
	+ E.Name = E.Name.substr(0, E.Name.find('@', 1));
	+ // By making sure E.SymbolName != E.Name for decorated symbols,
	+ // writeImportLibrary writes these symbols with the type
	+ // IMPORT_NAME_UNDECORATE.
	+ }
	+ }
	+
	if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true))
	return 1;
	return 0;
	}
	Index: head/contrib/llvm/lib/ToolDrivers/llvm-dlltool/Options.td
	===================================================================
	--- head/contrib/llvm/lib/ToolDrivers/llvm-dlltool/Options.td (revision 322854)
	+++ head/contrib/llvm/lib/ToolDrivers/llvm-dlltool/Options.td (revision 322855)
	@@ -1,26 +1,26 @@
	include "llvm/Option/OptParser.td"

	def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target machine">;
	def m_long : JoinedOrSeparate<["--"], "machine">, Alias<m>;

	def l: JoinedOrSeparate<["-"], "l">, HelpText<"Generate an import lib">;
	def l_long : JoinedOrSeparate<["--"], "output-lib">, Alias<l>;

	def D: JoinedOrSeparate<["-"], "D">, HelpText<"Specify the input DLL Name">;
	def D_long : JoinedOrSeparate<["--"], "dllname">, Alias<D>;

	def d: JoinedOrSeparate<["-"], "d">, HelpText<"Input .def File">;
	def d_long : JoinedOrSeparate<["--"], "input-def">, Alias<d>;

	+def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">;
	+def k_alias: Flag<["--"], "kill-at">, Alias<k>;
	+
	//==============================================================================
	// The flags below do nothing. They are defined only for dlltool compatibility.
	//==============================================================================
	-
	-def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">;
	-def k_alias: Flag<["--"], "kill-at">, Alias<k>;

	def S: JoinedOrSeparate<["-"], "S">, HelpText<"Assembler">;
	def S_alias: JoinedOrSeparate<["--"], "as">, Alias<S>;

	def f: JoinedOrSeparate<["-"], "f">, HelpText<"Assembler Flags">;
	def f_alias: JoinedOrSeparate<["--"], "as-flags">, Alias<f>;
	Index: head/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
	===================================================================
	--- head/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp (revision 322854)
	+++ head/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp (revision 322855)
	@@ -1,174 +1,173 @@
	//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This pass lowers atomic intrinsics to non-atomic form for use in a known
	// non-preemptible environment.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/Transforms/Scalar/LowerAtomic.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/Pass.h"
	#include "llvm/Transforms/Scalar.h"
	using namespace llvm;

	#define DEBUG_TYPE "loweratomic"

	static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
	IRBuilder<> Builder(CXI);
	Value *Ptr = CXI->getPointerOperand();
	Value *Cmp = CXI->getCompareOperand();
	Value *Val = CXI->getNewValOperand();

	LoadInst *Orig = Builder.CreateLoad(Ptr);
	Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
	Value *Res = Builder.CreateSelect(Equal, Val, Orig);
	Builder.CreateStore(Res, Ptr);

	Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0);
	Res = Builder.CreateInsertValue(Res, Equal, 1);

	CXI->replaceAllUsesWith(Res);
	CXI->eraseFromParent();
	return true;
	}

	static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
	IRBuilder<> Builder(RMWI);
	Value *Ptr = RMWI->getPointerOperand();
	Value *Val = RMWI->getValOperand();

	LoadInst *Orig = Builder.CreateLoad(Ptr);
	Value *Res = nullptr;

	switch (RMWI->getOperation()) {
	default: llvm_unreachable("Unexpected RMW operation");
	case AtomicRMWInst::Xchg:
	Res = Val;
	break;
	case AtomicRMWInst::Add:
	Res = Builder.CreateAdd(Orig, Val);
	break;
	case AtomicRMWInst::Sub:
	Res = Builder.CreateSub(Orig, Val);
	break;
	case AtomicRMWInst::And:
	Res = Builder.CreateAnd(Orig, Val);
	break;
	case AtomicRMWInst::Nand:
	Res = Builder.CreateNot(Builder.CreateAnd(Orig, Val));
	break;
	case AtomicRMWInst::Or:
	Res = Builder.CreateOr(Orig, Val);
	break;
	case AtomicRMWInst::Xor:
	Res = Builder.CreateXor(Orig, Val);
	break;
	case AtomicRMWInst::Max:
	Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
	Val, Orig);
	break;
	case AtomicRMWInst::Min:
	Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Val),
	Orig, Val);
	break;
	case AtomicRMWInst::UMax:
	Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
	Val, Orig);
	break;
	case AtomicRMWInst::UMin:
	Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
	Orig, Val);
	break;
	}
	Builder.CreateStore(Res, Ptr);
	RMWI->replaceAllUsesWith(Orig);
	RMWI->eraseFromParent();
	return true;
	}

	static bool LowerFenceInst(FenceInst *FI) {
	FI->eraseFromParent();
	return true;
	}

	static bool LowerLoadInst(LoadInst *LI) {
	LI->setAtomic(AtomicOrdering::NotAtomic);
	return true;
	}

	static bool LowerStoreInst(StoreInst *SI) {
	SI->setAtomic(AtomicOrdering::NotAtomic);
	return true;
	}

	static bool runOnBasicBlock(BasicBlock &BB) {
	bool Changed = false;
	for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE;) {
	Instruction Inst = &DI++;
	if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
	Changed \|= LowerFenceInst(FI);
	else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst))
	Changed \|= LowerAtomicCmpXchgInst(CXI);
	else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Inst))
	Changed \|= LowerAtomicRMWInst(RMWI);
	else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
	if (LI->isAtomic())
	LowerLoadInst(LI);
	} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
	if (SI->isAtomic())
	LowerStoreInst(SI);
	}
	}
	return Changed;
	}

	static bool lowerAtomics(Function &F) {
	bool Changed = false;
	for (BasicBlock &BB : F) {
	Changed \|= runOnBasicBlock(BB);
	}
	return Changed;
	}

	PreservedAnalyses LowerAtomicPass::run(Function &F, FunctionAnalysisManager &) {
	if (lowerAtomics(F))
	return PreservedAnalyses::none();
	return PreservedAnalyses::all();
	}

	namespace {
	class LowerAtomicLegacyPass : public FunctionPass {
	public:
	static char ID;

	LowerAtomicLegacyPass() : FunctionPass(ID) {
	initializeLowerAtomicLegacyPassPass(*PassRegistry::getPassRegistry());
	}

	bool runOnFunction(Function &F) override {
	- if (skipFunction(F))
	- return false;
	+ // Don't skip optnone functions; atomics still need to be lowered.
	FunctionAnalysisManager DummyFAM;
	auto PA = Impl.run(F, DummyFAM);
	return !PA.areAllPreserved();
	}

	private:
	LowerAtomicPass Impl;
	};
	}

	char LowerAtomicLegacyPass::ID = 0;
	INITIALIZE_PASS(LowerAtomicLegacyPass, "loweratomic",
	"Lower atomic intrinsics to non-atomic form", false, false)

	Pass *llvm::createLowerAtomicPass() { return new LowerAtomicLegacyPass(); }
	Index: head/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
	===================================================================
	--- head/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp (revision 322854)
	+++ head/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp (revision 322855)
	@@ -1,2281 +1,2287 @@
	//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This pass reassociates commutative expressions in an order that is designed
	// to promote better constant propagation, GCSE, LICM, PRE, etc.
	//
	// For example: 4 + (x + 5) -> x + (4 + 5)
	//
	// In the implementation of this algorithm, constants are assigned rank = 0,
	// function arguments are rank = 1, and other values are assigned ranks
	// corresponding to the reverse post order traversal of current function
	// (starting at 2), which effectively gives values in deep loops higher rank
	// than values not in loops.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/Transforms/Scalar/Reassociate.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/PostOrderIterator.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/Analysis/GlobalsModRef.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/IR/CFG.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/IR/ValueHandle.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Transforms/Scalar.h"
	#include "llvm/Transforms/Utils/Local.h"
	#include <algorithm>
	using namespace llvm;
	using namespace reassociate;

	#define DEBUG_TYPE "reassociate"

	STATISTIC(NumChanged, "Number of insts reassociated");
	STATISTIC(NumAnnihil, "Number of expr tree annihilated");
	STATISTIC(NumFactor , "Number of multiplies factored");

	#ifndef NDEBUG
	/// Print out the expression identified in the Ops list.
	///
	static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
	Module *M = I->getModule();
	dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
	<< *Ops[0].Op->getType() << '\t';
	for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
	dbgs() << "[ ";
	Ops[i].Op->printAsOperand(dbgs(), false, M);
	dbgs() << ", #" << Ops[i].Rank << "] ";
	}
	}
	#endif

	/// Utility class representing a non-constant Xor-operand. We classify
	/// non-constant Xor-Operands into two categories:
	/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
	/// C2)
	/// C2.1) The operand is in the form of "X \| C", where C is a non-zero
	/// constant.
	/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
	/// operand as "E \| 0"
	class llvm::reassociate::XorOpnd {
	public:
	XorOpnd(Value *V);

	bool isInvalid() const { return SymbolicPart == nullptr; }
	bool isOrExpr() const { return isOr; }
	Value *getValue() const { return OrigVal; }
	Value *getSymbolicPart() const { return SymbolicPart; }
	unsigned getSymbolicRank() const { return SymbolicRank; }
	const APInt &getConstPart() const { return ConstPart; }

	void Invalidate() { SymbolicPart = OrigVal = nullptr; }
	void setSymbolicRank(unsigned R) { SymbolicRank = R; }

	private:
	Value *OrigVal;
	Value *SymbolicPart;
	APInt ConstPart;
	unsigned SymbolicRank;
	bool isOr;
	};

	XorOpnd::XorOpnd(Value *V) {
	assert(!isa<ConstantInt>(V) && "No ConstantInt");
	OrigVal = V;
	Instruction *I = dyn_cast<Instruction>(V);
	SymbolicRank = 0;

	if (I && (I->getOpcode() == Instruction::Or \|\|
	I->getOpcode() == Instruction::And)) {
	Value *V0 = I->getOperand(0);
	Value *V1 = I->getOperand(1);
	const APInt *C;
	if (match(V0, PatternMatch::m_APInt(C)))
	std::swap(V0, V1);

	if (match(V1, PatternMatch::m_APInt(C))) {
	ConstPart = *C;
	SymbolicPart = V0;
	isOr = (I->getOpcode() == Instruction::Or);
	return;
	}
	}

	// view the operand as "V \| 0"
	SymbolicPart = V;
	ConstPart = APInt::getNullValue(V->getType()->getScalarSizeInBits());
	isOr = true;
	}

	/// Return true if V is an instruction of the specified opcode and if it
	/// only has one use.
	static BinaryOperator isReassociableOp(Value V, unsigned Opcode) {
	if (V->hasOneUse() && isa<Instruction>(V) &&
	cast<Instruction>(V)->getOpcode() == Opcode &&
	(!isa<FPMathOperator>(V) \|\|
	cast<Instruction>(V)->hasUnsafeAlgebra()))
	return cast<BinaryOperator>(V);
	return nullptr;
	}

	static BinaryOperator isReassociableOp(Value V, unsigned Opcode1,
	unsigned Opcode2) {
	if (V->hasOneUse() && isa<Instruction>(V) &&
	(cast<Instruction>(V)->getOpcode() == Opcode1 \|\|
	cast<Instruction>(V)->getOpcode() == Opcode2) &&
	(!isa<FPMathOperator>(V) \|\|
	cast<Instruction>(V)->hasUnsafeAlgebra()))
	return cast<BinaryOperator>(V);
	return nullptr;
	}

	void ReassociatePass::BuildRankMap(Function &F,
	ReversePostOrderTraversal<Function*> &RPOT) {
	unsigned i = 2;

	// Assign distinct ranks to function arguments.
	for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
	ValueRankMap[&*I] = ++i;
	DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n");
	}

	// Traverse basic blocks in ReversePostOrder
	for (BasicBlock *BB : RPOT) {
	unsigned BBRank = RankMap[BB] = ++i << 16;

	// Walk the basic block, adding precomputed ranks for any instructions that
	// we cannot move. This ensures that the ranks for these instructions are
	// all different in the block.
	for (Instruction &I : *BB)
	if (mayBeMemoryDependent(I))
	ValueRankMap[&I] = ++BBRank;
	}
	}

	unsigned ReassociatePass::getRank(Value *V) {
	Instruction *I = dyn_cast<Instruction>(V);
	if (!I) {
	if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
	return 0; // Otherwise it's a global or constant, rank 0.
	}

	if (unsigned Rank = ValueRankMap[I])
	return Rank; // Rank already known?

	// If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
	// we can reassociate expressions for code motion! Since we do not recurse
	// for PHI nodes, we cannot have infinite recursion here, because there
	// cannot be loops in the value graph that do not go through PHI nodes.
	unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
	for (unsigned i = 0, e = I->getNumOperands();
	i != e && Rank != MaxRank; ++i)
	Rank = std::max(Rank, getRank(I->getOperand(i)));

	// If this is a not or neg instruction, do not count it for rank. This
	// assures us that X and ~X will have the same rank.
	if (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
	!BinaryOperator::isFNeg(I))
	++Rank;

	DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n");

	return ValueRankMap[I] = Rank;
	}

	// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
	void ReassociatePass::canonicalizeOperands(Instruction *I) {
	assert(isa<BinaryOperator>(I) && "Expected binary operator.");
	assert(I->isCommutative() && "Expected commutative operator.");

	Value *LHS = I->getOperand(0);
	Value *RHS = I->getOperand(1);
	unsigned LHSRank = getRank(LHS);
	unsigned RHSRank = getRank(RHS);

	if (isa<Constant>(RHS))
	return;

	if (isa<Constant>(LHS) \|\| RHSRank < LHSRank)
	cast<BinaryOperator>(I)->swapOperands();
	}

	static BinaryOperator CreateAdd(Value S1, Value *S2, const Twine &Name,
	Instruction InsertBefore, Value FlagsOp) {
	if (S1->getType()->isIntOrIntVectorTy())
	return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
	else {
	BinaryOperator *Res =
	BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore);
	Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
	return Res;
	}
	}

	static BinaryOperator CreateMul(Value S1, Value *S2, const Twine &Name,
	Instruction InsertBefore, Value FlagsOp) {
	if (S1->getType()->isIntOrIntVectorTy())
	return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
	else {
	BinaryOperator *Res =
	BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore);
	Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
	return Res;
	}
	}

	static BinaryOperator CreateNeg(Value S1, const Twine &Name,
	Instruction InsertBefore, Value FlagsOp) {
	if (S1->getType()->isIntOrIntVectorTy())
	return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
	else {
	BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore);
	Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags());
	return Res;
	}
	}

	/// Replace 0-X with X*-1.
	static BinaryOperator LowerNegateToMultiply(Instruction Neg) {
	Type *Ty = Neg->getType();
	Constant *NegOne = Ty->isIntOrIntVectorTy() ?
	ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);

	BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
	Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
	Res->takeName(Neg);
	Neg->replaceAllUsesWith(Res);
	Res->setDebugLoc(Neg->getDebugLoc());
	return Res;
	}

	/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
	/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
	/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
	/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
	/// even x in Bitwidth-bit arithmetic.
	static unsigned CarmichaelShift(unsigned Bitwidth) {
	if (Bitwidth < 3)
	return Bitwidth - 1;
	return Bitwidth - 2;
	}

	/// Add the extra weight 'RHS' to the existing weight 'LHS',
	/// reducing the combined weight using any special properties of the operation.
	/// The existing weight LHS represents the computation X op X op ... op X where
	/// X occurs LHS times. The combined weight represents X op X op ... op X with
	/// X occurring LHS + RHS times. If op is "Xor" for example then the combined
	/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
	/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
	static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
	// If we were working with infinite precision arithmetic then the combined
	// weight would be LHS + RHS. But we are using finite precision arithmetic,
	// and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
	// for nilpotent operations and addition, but not for idempotent operations
	// and multiplication), so it is important to correctly reduce the combined
	// weight back into range if wrapping would be wrong.

	// If RHS is zero then the weight didn't change.
	if (RHS.isMinValue())
	return;
	// If LHS is zero then the combined weight is RHS.
	if (LHS.isMinValue()) {
	LHS = RHS;
	return;
	}
	// From this point on we know that neither LHS nor RHS is zero.

	if (Instruction::isIdempotent(Opcode)) {
	// Idempotent means X op X === X, so any non-zero weight is equivalent to a
	// weight of 1. Keeping weights at zero or one also means that wrapping is
	// not a problem.
	assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
	return; // Return a weight of 1.
	}
	if (Instruction::isNilpotent(Opcode)) {
	// Nilpotent means X op X === 0, so reduce weights modulo 2.
	assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
	LHS = 0; // 1 + 1 === 0 modulo 2.
	return;
	}
	if (Opcode == Instruction::Add \|\| Opcode == Instruction::FAdd) {
	// TODO: Reduce the weight by exploiting nsw/nuw?
	LHS += RHS;
	return;
	}

	assert((Opcode == Instruction::Mul \|\| Opcode == Instruction::FMul) &&
	"Unknown associative operation!");
	unsigned Bitwidth = LHS.getBitWidth();
	// If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
	// can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
	// bit number x, since either x is odd in which case x^CM = 1, or x is even in
	// which case both x^W and x^(W - CM) are zero. By subtracting off multiples
	// of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
	// which by a happy accident means that they can always be represented using
	// Bitwidth bits.
	// TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
	// the Carmichael number).
	if (Bitwidth > 3) {
	/// CM - The value of Carmichael's lambda function.
	APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
	// Any weight W >= Threshold can be replaced with W - CM.
	APInt Threshold = CM + Bitwidth;
	assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
	// For Bitwidth 4 or more the following sum does not overflow.
	LHS += RHS;
	while (LHS.uge(Threshold))
	LHS -= CM;
	} else {
	// To avoid problems with overflow do everything the same as above but using
	// a larger type.
	unsigned CM = 1U << CarmichaelShift(Bitwidth);
	unsigned Threshold = CM + Bitwidth;
	assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
	"Weights not reduced!");
	unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
	while (Total >= Threshold)
	Total -= CM;
	LHS = Total;
	}
	}

	typedef std::pair<Value*, APInt> RepeatedValue;

	/// Given an associative binary expression, return the leaf
	/// nodes in Ops along with their weights (how many times the leaf occurs). The
	/// original expression is the same as
	/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
	/// op
	/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
	/// op
	/// ...
	/// op
	/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
	///
	/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
	///
	/// This routine may modify the function, in which case it returns 'true'. The
	/// changes it makes may well be destructive, changing the value computed by 'I'
	/// to something completely different. Thus if the routine returns 'true' then
	/// you MUST either replace I with a new expression computed from the Ops array,
	/// or use RewriteExprTree to put the values back in.
	///
	/// A leaf node is either not a binary operation of the same kind as the root
	/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
	/// opcode), or is the same kind of binary operator but has a use which either
	/// does not belong to the expression, or does belong to the expression but is
	/// a leaf node. Every leaf node has at least one use that is a non-leaf node
	/// of the expression, while for non-leaf nodes (except for the root 'I') every
	/// use is a non-leaf node of the expression.
	///
	/// For example:
	/// expression graph node names
	///
	/// + \| I
	/// / \ \|
	/// + + \| A, B
	/// / \ / \ \|
	/// * + * \| C, D, E
	/// / \ / \ / \ \|
	/// + * \| F, G
	///
	/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
	/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
	///
	/// The expression is maximal: if some instruction is a binary operator of the
	/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
	/// then the instruction also belongs to the expression, is not a leaf node of
	/// it, and its operands also belong to the expression (but may be leaf nodes).
	///
	/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
	/// order to ensure that every non-root node in the expression has exactly one
	/// use by a non-leaf node of the expression. This destruction means that the
	/// caller MUST either replace 'I' with a new expression or use something like
	/// RewriteExprTree to put the values back in if the routine indicates that it
	/// made a change by returning 'true'.
	///
	/// In the above example either the right operand of A or the left operand of B
	/// will be replaced by undef. If it is B's operand then this gives:
	///
	/// + \| I
	/// / \ \|
	/// + + \| A, B - operand of B replaced with undef
	/// / \ \ \|
	/// * + * \| C, D, E
	/// / \ / \ / \ \|
	/// + * \| F, G
	///
	/// Note that such undef operands can only be reached by passing through 'I'.
	/// For example, if you visit operands recursively starting from a leaf node
	/// then you will never see such an undef operand unless you get back to 'I',
	/// which requires passing through a phi node.
	///
	/// Note that this routine may also mutate binary operators of the wrong type
	/// that have all uses inside the expression (i.e. only used by non-leaf nodes
	/// of the expression) if it can turn them into binary operators of the right
	/// type and thus make the expression bigger.

	static bool LinearizeExprTree(BinaryOperator *I,
	SmallVectorImpl<RepeatedValue> &Ops) {
	DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
	unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
	unsigned Opcode = I->getOpcode();
	assert(I->isAssociative() && I->isCommutative() &&
	"Expected an associative and commutative operation!");

	// Visit all operands of the expression, keeping track of their weight (the
	// number of paths from the expression root to the operand, or if you like
	// the number of times that operand occurs in the linearized expression).
	// For example, if I = X + A, where X = A + B, then I, X and B have weight 1
	// while A has weight two.

	// Worklist of non-leaf nodes (their operands are in the expression too) along
	// with their weights, representing a certain number of paths to the operator.
	// If an operator occurs in the worklist multiple times then we found multiple
	// ways to get to it.
	SmallVector<std::pair<BinaryOperator*, APInt>, 8> Worklist; // (Op, Weight)
	Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
	bool Changed = false;

	// Leaves of the expression are values that either aren't the right kind of
	// operation (eg: a constant, or a multiply in an add tree), or are, but have
	// some uses that are not inside the expression. For example, in I = X + X,
	// X = A + B, the value X has two uses (by I) that are in the expression. If
	// X has any other uses, for example in a return instruction, then we consider
	// X to be a leaf, and won't analyze it further. When we first visit a value,
	// if it has more than one use then at first we conservatively consider it to
	// be a leaf. Later, as the expression is explored, we may discover some more
	// uses of the value from inside the expression. If all uses turn out to be
	// from within the expression (and the value is a binary operator of the right
	// kind) then the value is no longer considered to be a leaf, and its operands
	// are explored.

	// Leaves - Keeps track of the set of putative leaves as well as the number of
	// paths to each leaf seen so far.
	typedef DenseMap<Value*, APInt> LeafMap;
	LeafMap Leaves; // Leaf -> Total weight so far.
	SmallVector<Value*, 8> LeafOrder; // Ensure deterministic leaf output order.

	#ifndef NDEBUG
	SmallPtrSet<Value*, 8> Visited; // For sanity checking the iteration scheme.
	#endif
	while (!Worklist.empty()) {
	std::pair<BinaryOperator*, APInt> P = Worklist.pop_back_val();
	I = P.first; // We examine the operands of this binary operator.

	for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands.
	Value *Op = I->getOperand(OpIdx);
	APInt Weight = P.second; // Number of paths to this operand.
	DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
	assert(!Op->use_empty() && "No uses, so how did we get to it?!");

	// If this is a binary operation of the right kind with only one use then
	// add its operands to the expression.
	if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
	assert(Visited.insert(Op).second && "Not first visit!");
	DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
	Worklist.push_back(std::make_pair(BO, Weight));
	continue;
	}

	// Appears to be a leaf. Is the operand already in the set of leaves?
	LeafMap::iterator It = Leaves.find(Op);
	if (It == Leaves.end()) {
	// Not in the leaf map. Must be the first time we saw this operand.
	assert(Visited.insert(Op).second && "Not first visit!");
	if (!Op->hasOneUse()) {
	// This value has uses not accounted for by the expression, so it is
	// not safe to modify. Mark it as being a leaf.
	DEBUG(dbgs() << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
	LeafOrder.push_back(Op);
	Leaves[Op] = Weight;
	continue;
	}
	// No uses outside the expression, try morphing it.
	} else {
	// Already in the leaf map.
	assert(It != Leaves.end() && Visited.count(Op) &&
	"In leaf map but not visited!");

	// Update the number of paths to the leaf.
	IncorporateWeight(It->second, Weight, Opcode);

	#if 0 // TODO: Re-enable once PR13021 is fixed.
	// The leaf already has one use from inside the expression. As we want
	// exactly one such use, drop this new use of the leaf.
	assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
	I->setOperand(OpIdx, UndefValue::get(I->getType()));
	Changed = true;

	// If the leaf is a binary operation of the right kind and we now see
	// that its multiple original uses were in fact all by nodes belonging
	// to the expression, then no longer consider it to be a leaf and add
	// its operands to the expression.
	if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
	DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
	Worklist.push_back(std::make_pair(BO, It->second));
	Leaves.erase(It);
	continue;
	}
	#endif

	// If we still have uses that are not accounted for by the expression
	// then it is not safe to modify the value.
	if (!Op->hasOneUse())
	continue;

	// No uses outside the expression, try morphing it.
	Weight = It->second;
	Leaves.erase(It); // Since the value may be morphed below.
	}

	// At this point we have a value which, first of all, is not a binary
	// expression of the right kind, and secondly, is only used inside the
	// expression. This means that it can safely be modified. See if we
	// can usefully morph it into an expression of the right kind.
	assert((!isa<Instruction>(Op) \|\|
	cast<Instruction>(Op)->getOpcode() != Opcode
	\|\| (isa<FPMathOperator>(Op) &&
	!cast<Instruction>(Op)->hasUnsafeAlgebra())) &&
	"Should have been handled above!");
	assert(Op->hasOneUse() && "Has uses outside the expression tree!");

	// If this is a multiply expression, turn any internal negations into
	// multiplies by -1 so they can be reassociated.
	if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op))
	if ((Opcode == Instruction::Mul && BinaryOperator::isNeg(BO)) \|\|
	(Opcode == Instruction::FMul && BinaryOperator::isFNeg(BO))) {
	DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
	BO = LowerNegateToMultiply(BO);
	DEBUG(dbgs() << *BO << '\n');
	Worklist.push_back(std::make_pair(BO, Weight));
	Changed = true;
	continue;
	}

	// Failed to morph into an expression of the right type. This really is
	// a leaf.
	DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
	assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
	LeafOrder.push_back(Op);
	Leaves[Op] = Weight;
	}
	}

	// The leaves, repeated according to their weights, represent the linearized
	// form of the expression.
	for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
	Value *V = LeafOrder[i];
	LeafMap::iterator It = Leaves.find(V);
	if (It == Leaves.end())
	// Node initially thought to be a leaf wasn't.
	continue;
	assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
	APInt Weight = It->second;
	if (Weight.isMinValue())
	// Leaf already output or weight reduction eliminated it.
	continue;
	// Ensure the leaf is only output once.
	It->second = 0;
	Ops.push_back(std::make_pair(V, Weight));
	}

	// For nilpotent operations or addition there may be no operands, for example
	// because the expression was "X xor X" or consisted of 2^Bitwidth additions:
	// in both cases the weight reduces to 0 causing the value to be skipped.
	if (Ops.empty()) {
	Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
	assert(Identity && "Associative operation without identity!");
	Ops.emplace_back(Identity, APInt(Bitwidth, 1));
	}

	return Changed;
	}

	/// Now that the operands for this expression tree are
	/// linearized and optimized, emit them in-order.
	void ReassociatePass::RewriteExprTree(BinaryOperator *I,
	SmallVectorImpl<ValueEntry> &Ops) {
	assert(Ops.size() > 1 && "Single values should be used directly!");

	// Since our optimizations should never increase the number of operations, the
	// new expression can usually be written reusing the existing binary operators
	// from the original expression tree, without creating any new instructions,
	// though the rewritten expression may have a completely different topology.
	// We take care to not change anything if the new expression will be the same
	// as the original. If more than trivial changes (like commuting operands)
	// were made then we are obliged to clear out any optional subclass data like
	// nsw flags.

	/// NodesToRewrite - Nodes from the original expression available for writing
	/// the new expression into.
	SmallVector<BinaryOperator*, 8> NodesToRewrite;
	unsigned Opcode = I->getOpcode();
	BinaryOperator *Op = I;

	/// NotRewritable - The operands being written will be the leaves of the new
	/// expression and must not be used as inner nodes (via NodesToRewrite) by
	/// mistake. Inner nodes are always reassociable, and usually leaves are not
	/// (if they were they would have been incorporated into the expression and so
	/// would not be leaves), so most of the time there is no danger of this. But
	/// in rare cases a leaf may become reassociable if an optimization kills uses
	/// of it, or it may momentarily become reassociable during rewriting (below)
	/// due it being removed as an operand of one of its uses. Ensure that misuse
	/// of leaf nodes as inner nodes cannot occur by remembering all of the future
	/// leaves and refusing to reuse any of them as inner nodes.
	SmallPtrSet<Value*, 8> NotRewritable;
	for (unsigned i = 0, e = Ops.size(); i != e; ++i)
	NotRewritable.insert(Ops[i].Op);

	// ExpressionChanged - Non-null if the rewritten expression differs from the
	// original in some non-trivial way, requiring the clearing of optional flags.
	// Flags are cleared from the operator in ExpressionChanged up to I inclusive.
	BinaryOperator *ExpressionChanged = nullptr;
	for (unsigned i = 0; ; ++i) {
	// The last operation (which comes earliest in the IR) is special as both
	// operands will come from Ops, rather than just one with the other being
	// a subexpression.
	if (i+2 == Ops.size()) {
	Value *NewLHS = Ops[i].Op;
	Value *NewRHS = Ops[i+1].Op;
	Value *OldLHS = Op->getOperand(0);
	Value *OldRHS = Op->getOperand(1);

	if (NewLHS == OldLHS && NewRHS == OldRHS)
	// Nothing changed, leave it alone.
	break;

	if (NewLHS == OldRHS && NewRHS == OldLHS) {
	// The order of the operands was reversed. Swap them.
	DEBUG(dbgs() << "RA: " << *Op << '\n');
	Op->swapOperands();
	DEBUG(dbgs() << "TO: " << *Op << '\n');
	MadeChange = true;
	++NumChanged;
	break;
	}

	// The new operation differs non-trivially from the original. Overwrite
	// the old operands with the new ones.
	DEBUG(dbgs() << "RA: " << *Op << '\n');
	if (NewLHS != OldLHS) {
	BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
	if (BO && !NotRewritable.count(BO))
	NodesToRewrite.push_back(BO);
	Op->setOperand(0, NewLHS);
	}
	if (NewRHS != OldRHS) {
	BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
	if (BO && !NotRewritable.count(BO))
	NodesToRewrite.push_back(BO);
	Op->setOperand(1, NewRHS);
	}
	DEBUG(dbgs() << "TO: " << *Op << '\n');

	ExpressionChanged = Op;
	MadeChange = true;
	++NumChanged;

	break;
	}

	// Not the last operation. The left-hand side will be a sub-expression
	// while the right-hand side will be the current element of Ops.
	Value *NewRHS = Ops[i].Op;
	if (NewRHS != Op->getOperand(1)) {
	DEBUG(dbgs() << "RA: " << *Op << '\n');
	if (NewRHS == Op->getOperand(0)) {
	// The new right-hand side was already present as the left operand. If
	// we are lucky then swapping the operands will sort out both of them.
	Op->swapOperands();
	} else {
	// Overwrite with the new right-hand side.
	BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
	if (BO && !NotRewritable.count(BO))
	NodesToRewrite.push_back(BO);
	Op->setOperand(1, NewRHS);
	ExpressionChanged = Op;
	}
	DEBUG(dbgs() << "TO: " << *Op << '\n');
	MadeChange = true;
	++NumChanged;
	}

	// Now deal with the left-hand side. If this is already an operation node
	// from the original expression then just rewrite the rest of the expression
	// into it.
	BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
	if (BO && !NotRewritable.count(BO)) {
	Op = BO;
	continue;
	}

	// Otherwise, grab a spare node from the original expression and use that as
	// the left-hand side. If there are no nodes left then the optimizers made
	// an expression with more nodes than the original! This usually means that
	// they did something stupid but it might mean that the problem was just too
	// hard (finding the mimimal number of multiplications needed to realize a
	// multiplication expression is NP-complete). Whatever the reason, smart or
	// stupid, create a new node if there are none left.
	BinaryOperator *NewOp;
	if (NodesToRewrite.empty()) {
	Constant *Undef = UndefValue::get(I->getType());
	NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
	Undef, Undef, "", I);
	if (NewOp->getType()->isFPOrFPVectorTy())
	NewOp->setFastMathFlags(I->getFastMathFlags());
	} else {
	NewOp = NodesToRewrite.pop_back_val();
	}

	DEBUG(dbgs() << "RA: " << *Op << '\n');
	Op->setOperand(0, NewOp);
	DEBUG(dbgs() << "TO: " << *Op << '\n');
	ExpressionChanged = Op;
	MadeChange = true;
	++NumChanged;
	Op = NewOp;
	}

	// If the expression changed non-trivially then clear out all subclass data
	// starting from the operator specified in ExpressionChanged, and compactify
	// the operators to just before the expression root to guarantee that the
	// expression tree is dominated by all of Ops.
	if (ExpressionChanged)
	do {
	// Preserve FastMathFlags.
	if (isa<FPMathOperator>(I)) {
	FastMathFlags Flags = I->getFastMathFlags();
	ExpressionChanged->clearSubclassOptionalData();
	ExpressionChanged->setFastMathFlags(Flags);
	} else
	ExpressionChanged->clearSubclassOptionalData();

	if (ExpressionChanged == I)
	break;
	ExpressionChanged->moveBefore(I);
	ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
	} while (1);

	// Throw away any left over nodes from the original expression.
	for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
	RedoInsts.insert(NodesToRewrite[i]);
	}

	/// Insert instructions before the instruction pointed to by BI,
	/// that computes the negative version of the value specified. The negative
	/// version of the value is returned, and BI is left pointing at the instruction
	/// that should be processed next by the reassociation pass.
	/// Also add intermediate instructions to the redo list that are modified while
	/// pushing the negates through adds. These will be revisited to see if
	/// additional opportunities have been exposed.
	static Value NegateValue(Value V, Instruction *BI,
	SetVector<AssertingVH<Instruction>> &ToRedo) {
	if (Constant *C = dyn_cast<Constant>(V)) {
	if (C->getType()->isFPOrFPVectorTy()) {
	return ConstantExpr::getFNeg(C);
	}
	return ConstantExpr::getNeg(C);
	}


	// We are trying to expose opportunity for reassociation. One of the things
	// that we want to do to achieve this is to push a negation as deep into an
	// expression chain as possible, to expose the add instructions. In practice,
	// this means that we turn this:
	// X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
	// so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
	// the constants. We assume that instcombine will clean up the mess later if
	// we introduce tons of unnecessary negation instructions.
	//
	if (BinaryOperator *I =
	isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
	// Push the negates through the add.
	I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
	I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
	if (I->getOpcode() == Instruction::Add) {
	I->setHasNoUnsignedWrap(false);
	I->setHasNoSignedWrap(false);
	}

	// We must move the add instruction here, because the neg instructions do
	// not dominate the old add instruction in general. By moving it, we are
	// assured that the neg instructions we just inserted dominate the
	// instruction we are about to insert after them.
	//
	I->moveBefore(BI);
	I->setName(I->getName()+".neg");

	// Add the intermediate negates to the redo list as processing them later
	// could expose more reassociating opportunities.
	ToRedo.insert(I);
	return I;
	}

	// Okay, we need to materialize a negated version of V with an instruction.
	// Scan the use lists of V to see if we have one already.
	for (User *U : V->users()) {
	if (!BinaryOperator::isNeg(U) && !BinaryOperator::isFNeg(U))
	continue;

	// We found one! Now we have to make sure that the definition dominates
	// this use. We do this by moving it to the entry block (if it is a
	// non-instruction value) or right after the definition. These negates will
	// be zapped by reassociate later, so we don't need much finesse here.
	BinaryOperator *TheNeg = cast<BinaryOperator>(U);

	// Verify that the negate is in this function, V might be a constant expr.
	if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
	continue;

	BasicBlock::iterator InsertPt;
	if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
	if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
	InsertPt = II->getNormalDest()->begin();
	} else {
	InsertPt = ++InstInput->getIterator();
	}
	while (isa<PHINode>(InsertPt)) ++InsertPt;
	} else {
	InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
	}
	TheNeg->moveBefore(&*InsertPt);
	if (TheNeg->getOpcode() == Instruction::Sub) {
	TheNeg->setHasNoUnsignedWrap(false);
	TheNeg->setHasNoSignedWrap(false);
	} else {
	TheNeg->andIRFlags(BI);
	}
	ToRedo.insert(TheNeg);
	return TheNeg;
	}

	// Insert a 'neg' instruction that subtracts the value from zero to get the
	// negation.
	BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
	ToRedo.insert(NewNeg);
	return NewNeg;
	}

	/// Return true if we should break up this subtract of X-Y into (X + -Y).
	static bool ShouldBreakUpSubtract(Instruction *Sub) {
	// If this is a negation, we can't split it up!
	if (BinaryOperator::isNeg(Sub) \|\| BinaryOperator::isFNeg(Sub))
	return false;

	// Don't breakup X - undef.
	if (isa<UndefValue>(Sub->getOperand(1)))
	return false;

	// Don't bother to break this up unless either the LHS is an associable add or
	// subtract or if this is only used by one.
	Value *V0 = Sub->getOperand(0);
	if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) \|\|
	isReassociableOp(V0, Instruction::Sub, Instruction::FSub))
	return true;
	Value *V1 = Sub->getOperand(1);
	if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) \|\|
	isReassociableOp(V1, Instruction::Sub, Instruction::FSub))
	return true;
	Value *VB = Sub->user_back();
	if (Sub->hasOneUse() &&
	(isReassociableOp(VB, Instruction::Add, Instruction::FAdd) \|\|
	isReassociableOp(VB, Instruction::Sub, Instruction::FSub)))
	return true;

	return false;
	}

	/// If we have (X-Y), and if either X is an add, or if this is only used by an
	/// add, transform this into (X+(0-Y)) to promote better reassociation.
	static BinaryOperator *
	BreakUpSubtract(Instruction *Sub, SetVector<AssertingVH<Instruction>> &ToRedo) {
	// Convert a subtract into an add and a neg instruction. This allows sub
	// instructions to be commuted with other add instructions.
	//
	// Calculate the negative value of Operand 1 of the sub instruction,
	// and set it as the RHS of the add instruction we just made.
	//
	Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
	BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
	Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
	Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
	New->takeName(Sub);

	// Everyone now refers to the add instruction.
	Sub->replaceAllUsesWith(New);
	New->setDebugLoc(Sub->getDebugLoc());

	DEBUG(dbgs() << "Negated: " << *New << '\n');
	return New;
	}

	/// If this is a shift of a reassociable multiply or is used by one, change
	/// this into a multiply by a constant to assist with further reassociation.
	static BinaryOperator ConvertShiftToMul(Instruction Shl) {
	Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
	MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));

	BinaryOperator *Mul =
	BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
	Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op.
	Mul->takeName(Shl);

	// Everyone now refers to the mul instruction.
	Shl->replaceAllUsesWith(Mul);
	Mul->setDebugLoc(Shl->getDebugLoc());

	// We can safely preserve the nuw flag in all cases. It's also safe to turn a
	// nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
	// handling.
	bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap();
	bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap();
	if (NSW && NUW)
	Mul->setHasNoSignedWrap(true);
	Mul->setHasNoUnsignedWrap(NUW);
	return Mul;
	}

	/// Scan backwards and forwards among values with the same rank as element i
	/// to see if X exists. If X does not exist, return i. This is useful when
	/// scanning for 'x' when we see '-x' because they both get the same rank.
	static unsigned FindInOperandList(const SmallVectorImpl<ValueEntry> &Ops,
	unsigned i, Value *X) {
	unsigned XRank = Ops[i].Rank;
	unsigned e = Ops.size();
	for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
	if (Ops[j].Op == X)
	return j;
	if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
	if (Instruction *I2 = dyn_cast<Instruction>(X))
	if (I1->isIdenticalTo(I2))
	return j;
	}
	// Scan backwards.
	for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) {
	if (Ops[j].Op == X)
	return j;
	if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op))
	if (Instruction *I2 = dyn_cast<Instruction>(X))
	if (I1->isIdenticalTo(I2))
	return j;
	}
	return i;
	}

	/// Emit a tree of add instructions, summing Ops together
	/// and returning the result. Insert the tree before I.
	static Value EmitAddTreeOfValues(Instruction I,
	SmallVectorImpl<WeakTrackingVH> &Ops) {
	if (Ops.size() == 1) return Ops.back();

	Value *V1 = Ops.back();
	Ops.pop_back();
	Value *V2 = EmitAddTreeOfValues(I, Ops);
	return CreateAdd(V2, V1, "tmp", I, I);
	}

	/// If V is an expression tree that is a multiplication sequence,
	/// and if this sequence contains a multiply by Factor,
	/// remove Factor from the tree and return the new tree.
	Value ReassociatePass::RemoveFactorFromExpression(Value V, Value *Factor) {
	BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
	if (!BO)
	return nullptr;

	SmallVector<RepeatedValue, 8> Tree;
	MadeChange \|= LinearizeExprTree(BO, Tree);
	SmallVector<ValueEntry, 8> Factors;
	Factors.reserve(Tree.size());
	for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
	RepeatedValue E = Tree[i];
	Factors.append(E.second.getZExtValue(),
	ValueEntry(getRank(E.first), E.first));
	}

	bool FoundFactor = false;
	bool NeedsNegate = false;
	for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
	if (Factors[i].Op == Factor) {
	FoundFactor = true;
	Factors.erase(Factors.begin()+i);
	break;
	}

	// If this is a negative version of this factor, remove it.
	if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) {
	if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
	if (FC1->getValue() == -FC2->getValue()) {
	FoundFactor = NeedsNegate = true;
	Factors.erase(Factors.begin()+i);
	break;
	}
	} else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) {
	if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) {
	const APFloat &F1 = FC1->getValueAPF();
	APFloat F2(FC2->getValueAPF());
	F2.changeSign();
	if (F1.compare(F2) == APFloat::cmpEqual) {
	FoundFactor = NeedsNegate = true;
	Factors.erase(Factors.begin() + i);
	break;
	}
	}
	}
	}

	if (!FoundFactor) {
	// Make sure to restore the operands to the expression tree.
	RewriteExprTree(BO, Factors);
	return nullptr;
	}

	BasicBlock::iterator InsertPt = ++BO->getIterator();

	// If this was just a single multiply, remove the multiply and return the only
	// remaining operand.
	if (Factors.size() == 1) {
	RedoInsts.insert(BO);
	V = Factors[0].Op;
	} else {
	RewriteExprTree(BO, Factors);
	V = BO;
	}

	if (NeedsNegate)
	V = CreateNeg(V, "neg", &*InsertPt, BO);

	return V;
	}

	/// If V is a single-use multiply, recursively add its operands as factors,
	/// otherwise add V to the list of factors.
	///
	/// Ops is the top-level list of add operands we're trying to factor.
	static void FindSingleUseMultiplyFactors(Value *V,
	SmallVectorImpl<Value*> &Factors) {
	BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
	if (!BO) {
	Factors.push_back(V);
	return;
	}

	// Otherwise, add the LHS and RHS to the list of factors.
	FindSingleUseMultiplyFactors(BO->getOperand(1), Factors);
	FindSingleUseMultiplyFactors(BO->getOperand(0), Factors);
	}

	/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
	/// This optimizes based on identities. If it can be reduced to a single Value,
	/// it is returned, otherwise the Ops list is mutated as necessary.
	static Value *OptimizeAndOrXor(unsigned Opcode,
	SmallVectorImpl<ValueEntry> &Ops) {
	// Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
	// If we find any, we can simplify the expression. X&~X == 0, X\|~X == -1.
	for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
	// First, check for X and ~X in the operand list.
	assert(i < Ops.size());
	if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^.
	Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
	unsigned FoundX = FindInOperandList(Ops, i, X);
	if (FoundX != i) {
	if (Opcode == Instruction::And) // ...&X&~X = 0
	return Constant::getNullValue(X->getType());

	if (Opcode == Instruction::Or) // ...\|X\|~X = -1
	return Constant::getAllOnesValue(X->getType());
	}
	}

	// Next, check for duplicate pairs of values, which we assume are next to
	// each other, due to our sorting criteria.
	assert(i < Ops.size());
	if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
	if (Opcode == Instruction::And \|\| Opcode == Instruction::Or) {
	// Drop duplicate values for And and Or.
	Ops.erase(Ops.begin()+i);
	--i; --e;
	++NumAnnihil;
	continue;
	}

	// Drop pairs of values for Xor.
	assert(Opcode == Instruction::Xor);
	if (e == 2)
	return Constant::getNullValue(Ops[0].Op->getType());

	// Y ^ X^X -> Y
	Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
	i -= 1; e -= 2;
	++NumAnnihil;
	}
	}
	return nullptr;
	}

	/// Helper function of CombineXorOpnd(). It creates a bitwise-and
	/// instruction with the given two operands, and return the resulting
	/// instruction. There are two special cases: 1) if the constant operand is 0,
	/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
	/// be returned.
	static Value createAndInstr(Instruction InsertBefore, Value *Opnd,
	const APInt &ConstOpnd) {
	if (ConstOpnd.isNullValue())
	return nullptr;

	if (ConstOpnd.isAllOnesValue())
	return Opnd;

	Instruction *I = BinaryOperator::CreateAnd(
	Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
	InsertBefore);
	I->setDebugLoc(InsertBefore->getDebugLoc());
	return I;
	}

	// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
	// into "R ^ C", where C would be 0, and R is a symbolic value.
	//
	// If it was successful, true is returned, and the "R" and "C" is returned
	// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
	// and both "Res" and "ConstOpnd" remain unchanged.
	//
	bool ReassociatePass::CombineXorOpnd(Instruction I, XorOpnd Opnd1,
	APInt &ConstOpnd, Value *&Res) {
	// Xor-Rule 1: (x \| c1) ^ c2 = (x \| c1) ^ (c1 ^ c1) ^ c2
	// = ((x \| c1) ^ c1) ^ (c1 ^ c2)
	// = (x & ~c1) ^ (c1 ^ c2)
	// It is useful only when c1 == c2.
	if (!Opnd1->isOrExpr() \|\| Opnd1->getConstPart().isNullValue())
	return false;

	if (!Opnd1->getValue()->hasOneUse())
	return false;

	const APInt &C1 = Opnd1->getConstPart();
	if (C1 != ConstOpnd)
	return false;

	Value *X = Opnd1->getSymbolicPart();
	Res = createAndInstr(I, X, ~C1);
	// ConstOpnd was C2, now C1 ^ C2.
	ConstOpnd ^= C1;

	if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
	RedoInsts.insert(T);
	return true;
	}


	// Helper function of OptimizeXor(). It tries to simplify
	// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
	// symbolic value.
	//
	// If it was successful, true is returned, and the "R" and "C" is returned
	// via "Res" and "ConstOpnd", respectively (If the entire expression is
	// evaluated to a constant, the Res is set to NULL); otherwise, false is
	// returned, and both "Res" and "ConstOpnd" remain unchanged.
	bool ReassociatePass::CombineXorOpnd(Instruction I, XorOpnd Opnd1,
	XorOpnd *Opnd2, APInt &ConstOpnd,
	Value *&Res) {
	Value *X = Opnd1->getSymbolicPart();
	if (X != Opnd2->getSymbolicPart())
	return false;

	// This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
	int DeadInstNum = 1;
	if (Opnd1->getValue()->hasOneUse())
	DeadInstNum++;
	if (Opnd2->getValue()->hasOneUse())
	DeadInstNum++;

	// Xor-Rule 2:
	// (x \| c1) ^ (x & c2)
	// = (x\|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x\|c1) ^ c1) ^ (x & c2) ^ c1
	// = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
	// = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
	//
	if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
	if (Opnd2->isOrExpr())
	std::swap(Opnd1, Opnd2);

	const APInt &C1 = Opnd1->getConstPart();
	const APInt &C2 = Opnd2->getConstPart();
	APInt C3((~C1) ^ C2);

	// Do not increase code size!
	if (!C3.isNullValue() && !C3.isAllOnesValue()) {
	int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
	if (NewInstNum > DeadInstNum)
	return false;
	}

	Res = createAndInstr(I, X, C3);
	ConstOpnd ^= C1;

	} else if (Opnd1->isOrExpr()) {
	// Xor-Rule 3: (x \| c1) ^ (x \| c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
	//
	const APInt &C1 = Opnd1->getConstPart();
	const APInt &C2 = Opnd2->getConstPart();
	APInt C3 = C1 ^ C2;

	// Do not increase code size
	if (!C3.isNullValue() && !C3.isAllOnesValue()) {
	int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
	if (NewInstNum > DeadInstNum)
	return false;
	}

	Res = createAndInstr(I, X, C3);
	ConstOpnd ^= C3;
	} else {
	// Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
	//
	const APInt &C1 = Opnd1->getConstPart();
	const APInt &C2 = Opnd2->getConstPart();
	APInt C3 = C1 ^ C2;
	Res = createAndInstr(I, X, C3);
	}

	// Put the original operands in the Redo list; hope they will be deleted
	// as dead code.
	if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
	RedoInsts.insert(T);
	if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
	RedoInsts.insert(T);

	return true;
	}

	/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
	/// to a single Value, it is returned, otherwise the Ops list is mutated as
	/// necessary.
	Value ReassociatePass::OptimizeXor(Instruction I,
	SmallVectorImpl<ValueEntry> &Ops) {
	if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
	return V;

	if (Ops.size() == 1)
	return nullptr;

	SmallVector<XorOpnd, 8> Opnds;
	SmallVector<XorOpnd*, 8> OpndPtrs;
	Type *Ty = Ops[0].Op->getType();
	APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);

	// Step 1: Convert ValueEntry to XorOpnd
	for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
	Value *V = Ops[i].Op;
	const APInt *C;
	// TODO: Support non-splat vectors.
	if (match(V, PatternMatch::m_APInt(C))) {
	ConstOpnd ^= *C;
	} else {
	XorOpnd O(V);
	O.setSymbolicRank(getRank(O.getSymbolicPart()));
	Opnds.push_back(O);
	}
	}

	// NOTE: From this point on, do NOT add/delete element to/from "Opnds".
	// It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
	// the "OpndPtrs" as well. For the similar reason, do not fuse this loop
	// with the previous loop --- the iterator of the "Opnds" may be invalidated
	// when new elements are added to the vector.
	for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
	OpndPtrs.push_back(&Opnds[i]);

	// Step 2: Sort the Xor-Operands in a way such that the operands containing
	// the same symbolic value cluster together. For instance, the input operand
	// sequence ("x \| 123", "y & 456", "x & 789") will be sorted into:
	// ("x \| 123", "x & 789", "y & 456").
	//
	// The purpose is twofold:
	// 1) Cluster together the operands sharing the same symbolic-value.
	// 2) Operand having smaller symbolic-value-rank is permuted earlier, which
	// could potentially shorten crital path, and expose more loop-invariants.
	// Note that values' rank are basically defined in RPO order (FIXME).
	// So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
	// than Y which is defined earlier than Z. Permute "x \| 1", "Y & 2",
	// "z" in the order of X-Y-Z is better than any other orders.
	std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(),
	[](XorOpnd LHS, XorOpnd RHS) {
	return LHS->getSymbolicRank() < RHS->getSymbolicRank();
	});

	// Step 3: Combine adjacent operands
	XorOpnd *PrevOpnd = nullptr;
	bool Changed = false;
	for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
	XorOpnd *CurrOpnd = OpndPtrs[i];
	// The combined value
	Value *CV;

	// Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
	if (!ConstOpnd.isNullValue() &&
	CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
	Changed = true;
	if (CV)
	*CurrOpnd = XorOpnd(CV);
	else {
	CurrOpnd->Invalidate();
	continue;
	}
	}

	if (!PrevOpnd \|\| CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
	PrevOpnd = CurrOpnd;
	continue;
	}

	// step 3.2: When previous and current operands share the same symbolic
	// value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
	//
	if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
	// Remove previous operand
	PrevOpnd->Invalidate();
	if (CV) {
	*CurrOpnd = XorOpnd(CV);
	PrevOpnd = CurrOpnd;
	} else {
	CurrOpnd->Invalidate();
	PrevOpnd = nullptr;
	}
	Changed = true;
	}
	}

	// Step 4: Reassemble the Ops
	if (Changed) {
	Ops.clear();
	for (unsigned int i = 0, e = Opnds.size(); i < e; i++) {
	XorOpnd &O = Opnds[i];
	if (O.isInvalid())
	continue;
	ValueEntry VE(getRank(O.getValue()), O.getValue());
	Ops.push_back(VE);
	}
	if (!ConstOpnd.isNullValue()) {
	Value *C = ConstantInt::get(Ty, ConstOpnd);
	ValueEntry VE(getRank(C), C);
	Ops.push_back(VE);
	}
	unsigned Sz = Ops.size();
	if (Sz == 1)
	return Ops.back().Op;
	if (Sz == 0) {
	assert(ConstOpnd.isNullValue());
	return ConstantInt::get(Ty, ConstOpnd);
	}
	}

	return nullptr;
	}

	/// Optimize a series of operands to an 'add' instruction. This
	/// optimizes based on identities. If it can be reduced to a single Value, it
	/// is returned, otherwise the Ops list is mutated as necessary.
	Value ReassociatePass::OptimizeAdd(Instruction I,
	SmallVectorImpl<ValueEntry> &Ops) {
	// Scan the operand lists looking for X and -X pairs. If we find any, we
	// can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
	// scan for any
	// duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.

	for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
	Value *TheOp = Ops[i].Op;
	// Check to see if we've seen this operand before. If so, we factor all
	// instances of the operand together. Due to our sorting criteria, we know
	// that these need to be next to each other in the vector.
	if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
	// Rescan the list, remove all instances of this operand from the expr.
	unsigned NumFound = 0;
	do {
	Ops.erase(Ops.begin()+i);
	++NumFound;
	} while (i != Ops.size() && Ops[i].Op == TheOp);

	DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n');
	++NumFactor;

	// Insert a new multiply.
	Type *Ty = TheOp->getType();
	Constant *C = Ty->isIntOrIntVectorTy() ?
	ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
	Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);

	// Now that we have inserted a multiply, optimize it. This allows us to
	// handle cases that require multiple factoring steps, such as this:
	// (X2) + (X2) + (X2) -> (X2)3 -> X6
	RedoInsts.insert(Mul);

	// If every add operand was a duplicate, return the multiply.
	if (Ops.empty())
	return Mul;

	// Otherwise, we had some input that didn't have the dupe, such as
	// "A + A + B" -> "A*2 + B". Add the new multiply to the list of
	// things being added by this operation.
	Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));

	--i;
	e = Ops.size();
	continue;
	}

	// Check for X and -X or X and ~X in the operand list.
	if (!BinaryOperator::isNeg(TheOp) && !BinaryOperator::isFNeg(TheOp) &&
	!BinaryOperator::isNot(TheOp))
	continue;

	Value *X = nullptr;
	if (BinaryOperator::isNeg(TheOp) \|\| BinaryOperator::isFNeg(TheOp))
	X = BinaryOperator::getNegArgument(TheOp);
	else if (BinaryOperator::isNot(TheOp))
	X = BinaryOperator::getNotArgument(TheOp);

	unsigned FoundX = FindInOperandList(Ops, i, X);
	if (FoundX == i)
	continue;

	// Remove X and -X from the operand list.
	if (Ops.size() == 2 &&
	(BinaryOperator::isNeg(TheOp) \|\| BinaryOperator::isFNeg(TheOp)))
	return Constant::getNullValue(X->getType());

	// Remove X and ~X from the operand list.
	if (Ops.size() == 2 && BinaryOperator::isNot(TheOp))
	return Constant::getAllOnesValue(X->getType());

	Ops.erase(Ops.begin()+i);
	if (i < FoundX)
	--FoundX;
	else
	--i; // Need to back up an extra one.
	Ops.erase(Ops.begin()+FoundX);
	++NumAnnihil;
	--i; // Revisit element.
	e -= 2; // Removed two elements.

	// if X and ~X we append -1 to the operand list.
	if (BinaryOperator::isNot(TheOp)) {
	Value *V = Constant::getAllOnesValue(X->getType());
	Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
	e += 1;
	}
	}

	// Scan the operand list, checking to see if there are any common factors
	// between operands. Consider something like AA+AB*C+D. We would like to
	// reassociate this to A(A+BC)+D, which reduces the number of multiplies.
	// To efficiently find this, we count the number of times a factor occurs
	// for any ADD operands that are MULs.
	DenseMap<Value*, unsigned> FactorOccurrences;

	// Keep track of each multiply we see, to avoid triggering on (X4)+(X4)
	// where they are actually the same multiply.
	unsigned MaxOcc = 0;
	Value *MaxOccVal = nullptr;
	for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
	BinaryOperator *BOp =
	isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
	if (!BOp)
	continue;

	// Compute all of the factors of this added value.
	SmallVector<Value*, 8> Factors;
	FindSingleUseMultiplyFactors(BOp, Factors);
	assert(Factors.size() > 1 && "Bad linearize!");

	// Add one to FactorOccurrences for each unique factor in this op.
	SmallPtrSet<Value*, 8> Duplicates;
	for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
	Value *Factor = Factors[i];
	if (!Duplicates.insert(Factor).second)
	continue;

	unsigned Occ = ++FactorOccurrences[Factor];
	if (Occ > MaxOcc) {
	MaxOcc = Occ;
	MaxOccVal = Factor;
	}

	// If Factor is a negative constant, add the negated value as a factor
	// because we can percolate the negate out. Watch for minint, which
	// cannot be positivified.
	if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) {
	if (CI->isNegative() && !CI->isMinValue(true)) {
	Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
	if (!Duplicates.insert(Factor).second)
	continue;
	unsigned Occ = ++FactorOccurrences[Factor];
	if (Occ > MaxOcc) {
	MaxOcc = Occ;
	MaxOccVal = Factor;
	}
	}
	} else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) {
	if (CF->isNegative()) {
	APFloat F(CF->getValueAPF());
	F.changeSign();
	Factor = ConstantFP::get(CF->getContext(), F);
	if (!Duplicates.insert(Factor).second)
	continue;
	unsigned Occ = ++FactorOccurrences[Factor];
	if (Occ > MaxOcc) {
	MaxOcc = Occ;
	MaxOccVal = Factor;
	}
	}
	}
	}
	}

	// If any factor occurred more than one time, we can pull it out.
	if (MaxOcc > 1) {
	DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n');
	++NumFactor;

	// Create a new instruction that uses the MaxOccVal twice. If we don't do
	// this, we could otherwise run into situations where removing a factor
	// from an expression will drop a use of maxocc, and this can cause
	// RemoveFactorFromExpression on successive values to behave differently.
	Instruction *DummyInst =
	I->getType()->isIntOrIntVectorTy()
	? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
	: BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);

	SmallVector<WeakTrackingVH, 4> NewMulOps;
	for (unsigned i = 0; i != Ops.size(); ++i) {
	// Only try to remove factors from expressions we're allowed to.
	BinaryOperator *BOp =
	isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul);
	if (!BOp)
	continue;

	if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
	// The factorized operand may occur several times. Convert them all in
	// one fell swoop.
	for (unsigned j = Ops.size(); j != i;) {
	--j;
	if (Ops[j].Op == Ops[i].Op) {
	NewMulOps.push_back(V);
	Ops.erase(Ops.begin()+j);
	}
	}
	--i;
	}
	}

	// No need for extra uses anymore.
	DummyInst->deleteValue();

	unsigned NumAddedValues = NewMulOps.size();
	Value *V = EmitAddTreeOfValues(I, NewMulOps);

	// Now that we have inserted the add tree, optimize it. This allows us to
	// handle cases that require multiple factoring steps, such as this:
	// AAB + AAC --> A(AB+AC) --> A(A*(B+C))
	assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
	(void)NumAddedValues;
	if (Instruction *VI = dyn_cast<Instruction>(V))
	RedoInsts.insert(VI);

	// Create the multiply.
	Instruction *V2 = CreateMul(V, MaxOccVal, "tmp", I, I);

	// Rerun associate on the multiply in case the inner expression turned into
	// a multiply. We want to make sure that we keep things in canonical form.
	RedoInsts.insert(V2);

	// If every add operand included the factor (e.g. "AB + AC"), then the
	// entire result expression is just the multiply "A*(B+C)".
	if (Ops.empty())
	return V2;

	// Otherwise, we had some input that didn't have the factor, such as
	// "AB + AC + D" -> "A*(B+C) + D". Add the new multiply to the list of
	// things being added by this operation.
	Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
	}

	return nullptr;
	}

	/// \brief Build up a vector of value/power pairs factoring a product.
	///
	/// Given a series of multiplication operands, build a vector of factors and
	/// the powers each is raised to when forming the final product. Sort them in
	/// the order of descending power.
	///
	/// (x*x) -> [(x, 2)]
	/// ((xx)x) -> [(x, 3)]
	/// ((((xy)x)y)x) -> [(x, 3), (y, 2)]
	///
	/// \returns Whether any factors have a power greater than one.
	static bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
	SmallVectorImpl<Factor> &Factors) {
	// FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
	// Compute the sum of powers of simplifiable factors.
	unsigned FactorPowerSum = 0;
	for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) {
	Value *Op = Ops[Idx-1].Op;

	// Count the number of occurrences of this value.
	unsigned Count = 1;
	for (; Idx < Size && Ops[Idx].Op == Op; ++Idx)
	++Count;
	// Track for simplification all factors which occur 2 or more times.
	if (Count > 1)
	FactorPowerSum += Count;
	}

	// We can only simplify factors if the sum of the powers of our simplifiable
	// factors is 4 or higher. When that is the case, we will always have
	// a simplification. This is an important invariant to prevent cyclicly
	// trying to simplify already minimal formations.
	if (FactorPowerSum < 4)
	return false;

	// Now gather the simplifiable factors, removing them from Ops.
	FactorPowerSum = 0;
	for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) {
	Value *Op = Ops[Idx-1].Op;

	// Count the number of occurrences of this value.
	unsigned Count = 1;
	for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx)
	++Count;
	if (Count == 1)
	continue;
	// Move an even number of occurrences to Factors.
	Count &= ~1U;
	Idx -= Count;
	FactorPowerSum += Count;
	Factors.push_back(Factor(Op, Count));
	Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count);
	}

	// None of the adjustments above should have reduced the sum of factor powers
	// below our mininum of '4'.
	assert(FactorPowerSum >= 4);

	std::stable_sort(Factors.begin(), Factors.end(),
	[](const Factor &LHS, const Factor &RHS) {
	return LHS.Power > RHS.Power;
	});
	return true;
	}

	/// \brief Build a tree of multiplies, computing the product of Ops.
	static Value *buildMultiplyTree(IRBuilder<> &Builder,
	SmallVectorImpl<Value*> &Ops) {
	if (Ops.size() == 1)
	return Ops.back();

	Value *LHS = Ops.pop_back_val();
	do {
	if (LHS->getType()->isIntOrIntVectorTy())
	LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
	else
	LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
	} while (!Ops.empty());

	return LHS;
	}

	/// \brief Build a minimal multiplication DAG for (a^x)(b^y)(c^z)*...
	///
	/// Given a vector of values raised to various powers, where no two values are
	/// equal and the powers are sorted in decreasing order, compute the minimal
	/// DAG of multiplies to compute the final product, and return that product
	/// value.
	Value *
	ReassociatePass::buildMinimalMultiplyDAG(IRBuilder<> &Builder,
	SmallVectorImpl<Factor> &Factors) {
	assert(Factors[0].Power);
	SmallVector<Value *, 4> OuterProduct;
	for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size();
	Idx < Size && Factors[Idx].Power > 0; ++Idx) {
	if (Factors[Idx].Power != Factors[LastIdx].Power) {
	LastIdx = Idx;
	continue;
	}

	// We want to multiply across all the factors with the same power so that
	// we can raise them to that power as a single entity. Build a mini tree
	// for that.
	SmallVector<Value *, 4> InnerProduct;
	InnerProduct.push_back(Factors[LastIdx].Base);
	do {
	InnerProduct.push_back(Factors[Idx].Base);
	++Idx;
	} while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power);

	// Reset the base value of the first factor to the new expression tree.
	// We'll remove all the factors with the same power in a second pass.
	Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct);
	if (Instruction *MI = dyn_cast<Instruction>(M))
	RedoInsts.insert(MI);

	LastIdx = Idx;
	}
	// Unique factors with equal powers -- we've folded them into the first one's
	// base.
	Factors.erase(std::unique(Factors.begin(), Factors.end(),
	[](const Factor &LHS, const Factor &RHS) {
	return LHS.Power == RHS.Power;
	}),
	Factors.end());

	// Iteratively collect the base of each factor with an add power into the
	// outer product, and halve each power in preparation for squaring the
	// expression.
	for (unsigned Idx = 0, Size = Factors.size(); Idx != Size; ++Idx) {
	if (Factors[Idx].Power & 1)
	OuterProduct.push_back(Factors[Idx].Base);
	Factors[Idx].Power >>= 1;
	}
	if (Factors[0].Power) {
	Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
	OuterProduct.push_back(SquareRoot);
	OuterProduct.push_back(SquareRoot);
	}
	if (OuterProduct.size() == 1)
	return OuterProduct.front();

	Value *V = buildMultiplyTree(Builder, OuterProduct);
	return V;
	}

	Value ReassociatePass::OptimizeMul(BinaryOperator I,
	SmallVectorImpl<ValueEntry> &Ops) {
	// We can only optimize the multiplies when there is a chain of more than
	// three, such that a balanced tree might require fewer total multiplies.
	if (Ops.size() < 4)
	return nullptr;

	// Try to turn linear trees of multiplies without other uses of the
	// intermediate stages into minimal multiply DAGs with perfect sub-expression
	// re-use.
	SmallVector<Factor, 4> Factors;
	if (!collectMultiplyFactors(Ops, Factors))
	return nullptr; // All distinct factors, so nothing left for us to do.

	IRBuilder<> Builder(I);
	// The reassociate transformation for FP operations is performed only
	// if unsafe algebra is permitted by FastMathFlags. Propagate those flags
	// to the newly generated operations.
	if (auto FPI = dyn_cast<FPMathOperator>(I))
	Builder.setFastMathFlags(FPI->getFastMathFlags());

	Value *V = buildMinimalMultiplyDAG(Builder, Factors);
	if (Ops.empty())
	return V;

	ValueEntry NewEntry = ValueEntry(getRank(V), V);
	Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
	return nullptr;
	}

	Value ReassociatePass::OptimizeExpression(BinaryOperator I,
	SmallVectorImpl<ValueEntry> &Ops) {
	// Now that we have the linearized expression tree, try to optimize it.
	// Start by folding any constants that we found.
	Constant *Cst = nullptr;
	unsigned Opcode = I->getOpcode();
	while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
	Constant *C = cast<Constant>(Ops.pop_back_val().Op);
	Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C;
	}
	// If there was nothing but constants then we are done.
	if (Ops.empty())
	return Cst;

	// Put the combined constant back at the end of the operand list, except if
	// there is no point. For example, an add of 0 gets dropped here, while a
	// multiplication by zero turns the whole expression into zero.
	if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
	if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
	return Cst;
	Ops.push_back(ValueEntry(0, Cst));
	}

	if (Ops.size() == 1) return Ops[0].Op;

	// Handle destructive annihilation due to identities between elements in the
	// argument list here.
	unsigned NumOps = Ops.size();
	switch (Opcode) {
	default: break;
	case Instruction::And:
	case Instruction::Or:
	if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
	return Result;
	break;

	case Instruction::Xor:
	if (Value *Result = OptimizeXor(I, Ops))
	return Result;
	break;

	case Instruction::Add:
	case Instruction::FAdd:
	if (Value *Result = OptimizeAdd(I, Ops))
	return Result;
	break;

	case Instruction::Mul:
	case Instruction::FMul:
	if (Value *Result = OptimizeMul(I, Ops))
	return Result;
	break;
	}

	if (Ops.size() != NumOps)
	return OptimizeExpression(I, Ops);
	return nullptr;
	}

	// Remove dead instructions and if any operands are trivially dead add them to
	// Insts so they will be removed as well.
	void ReassociatePass::RecursivelyEraseDeadInsts(
	Instruction *I, SetVector<AssertingVH<Instruction>> &Insts) {
	assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
	SmallVector<Value *, 4> Ops(I->op_begin(), I->op_end());
	ValueRankMap.erase(I);
	Insts.remove(I);
	RedoInsts.remove(I);
	I->eraseFromParent();
	for (auto Op : Ops)
	if (Instruction *OpInst = dyn_cast<Instruction>(Op))
	if (OpInst->use_empty())
	Insts.insert(OpInst);
	}

	/// Zap the given instruction, adding interesting operands to the work list.
	void ReassociatePass::EraseInst(Instruction *I) {
	assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
	DEBUG(dbgs() << "Erasing dead inst: "; I->dump());

	SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
	// Erase the dead instruction.
	ValueRankMap.erase(I);
	RedoInsts.remove(I);
	I->eraseFromParent();
	// Optimize its operands.
	SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
	for (unsigned i = 0, e = Ops.size(); i != e; ++i)
	if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
	// If this is a node in an expression tree, climb to the expression root
	// and add that since that's where optimization actually happens.
	unsigned Opcode = Op->getOpcode();
	while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
	Visited.insert(Op).second)
	Op = Op->user_back();
	RedoInsts.insert(Op);
	}

	MadeChange = true;
	}

	// Canonicalize expressions of the following form:
	// x + (-Constant * y) -> x - (Constant * y)
	// x - (-Constant * y) -> x + (Constant * y)
	Instruction ReassociatePass::canonicalizeNegConstExpr(Instruction I) {
	if (!I->hasOneUse() \|\| I->getType()->isVectorTy())
	return nullptr;

	// Must be a fmul or fdiv instruction.
	unsigned Opcode = I->getOpcode();
	if (Opcode != Instruction::FMul && Opcode != Instruction::FDiv)
	return nullptr;

	auto *C0 = dyn_cast<ConstantFP>(I->getOperand(0));
	auto *C1 = dyn_cast<ConstantFP>(I->getOperand(1));

	// Both operands are constant, let it get constant folded away.
	if (C0 && C1)
	return nullptr;

	ConstantFP *CF = C0 ? C0 : C1;

	// Must have one constant operand.
	if (!CF)
	return nullptr;

	// Must be a negative ConstantFP.
	if (!CF->isNegative())
	return nullptr;

	// User must be a binary operator with one or more uses.
	Instruction *User = I->user_back();
	if (!isa<BinaryOperator>(User) \|\| User->use_empty())
	return nullptr;

	unsigned UserOpcode = User->getOpcode();
	if (UserOpcode != Instruction::FAdd && UserOpcode != Instruction::FSub)
	return nullptr;

	// Subtraction is not commutative. Explicitly, the following transform is
	// not valid: (-Constant * y) - x -> x + (Constant * y)
	if (!User->isCommutative() && User->getOperand(1) != I)
	return nullptr;

	+ // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the
	+ // resulting subtract will be broken up later. This can get us into an
	+ // infinite loop during reassociation.
	+ if (UserOpcode == Instruction::FAdd && ShouldBreakUpSubtract(User))
	+ return nullptr;
	+
	// Change the sign of the constant.
	APFloat Val = CF->getValueAPF();
	Val.changeSign();
	I->setOperand(C0 ? 0 : 1, ConstantFP::get(CF->getContext(), Val));

	// Canonicalize I to RHS to simplify the next bit of logic. E.g.,
	// ((-Consty) + x) -> (x + (-Consty)).
	if (User->getOperand(0) == I && User->isCommutative())
	cast<BinaryOperator>(User)->swapOperands();

	Value *Op0 = User->getOperand(0);
	Value *Op1 = User->getOperand(1);
	BinaryOperator *NI;
	switch (UserOpcode) {
	default:
	llvm_unreachable("Unexpected Opcode!");
	case Instruction::FAdd:
	NI = BinaryOperator::CreateFSub(Op0, Op1);
	NI->setFastMathFlags(cast<FPMathOperator>(User)->getFastMathFlags());
	break;
	case Instruction::FSub:
	NI = BinaryOperator::CreateFAdd(Op0, Op1);
	NI->setFastMathFlags(cast<FPMathOperator>(User)->getFastMathFlags());
	break;
	}

	NI->insertBefore(User);
	NI->setName(User->getName());
	User->replaceAllUsesWith(NI);
	NI->setDebugLoc(I->getDebugLoc());
	RedoInsts.insert(I);
	MadeChange = true;
	return NI;
	}

	/// Inspect and optimize the given instruction. Note that erasing
	/// instructions is not allowed.
	void ReassociatePass::OptimizeInst(Instruction *I) {
	// Only consider operations that we understand.
	if (!isa<BinaryOperator>(I))
	return;

	if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
	// If an operand of this shift is a reassociable multiply, or if the shift
	// is used by a reassociable multiply or add, turn into a multiply.
	if (isReassociableOp(I->getOperand(0), Instruction::Mul) \|\|
	(I->hasOneUse() &&
	(isReassociableOp(I->user_back(), Instruction::Mul) \|\|
	isReassociableOp(I->user_back(), Instruction::Add)))) {
	Instruction *NI = ConvertShiftToMul(I);
	RedoInsts.insert(I);
	MadeChange = true;
	I = NI;
	}

	// Canonicalize negative constants out of expressions.
	if (Instruction *Res = canonicalizeNegConstExpr(I))
	I = Res;

	// Commute binary operators, to canonicalize the order of their operands.
	// This can potentially expose more CSE opportunities, and makes writing other
	// transformations simpler.
	if (I->isCommutative())
	canonicalizeOperands(I);

	// Don't optimize floating point instructions that don't have unsafe algebra.
	if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra())
	return;

	// Do not reassociate boolean (i1) expressions. We want to preserve the
	// original order of evaluation for short-circuited comparisons that
	// SimplifyCFG has folded to AND/OR expressions. If the expression
	// is not further optimized, it is likely to be transformed back to a
	// short-circuited form for code gen, and the source order may have been
	// optimized for the most likely conditions.
	if (I->getType()->isIntegerTy(1))
	return;

	// If this is a subtract instruction which is not already in negate form,
	// see if we can convert it to X+-Y.
	if (I->getOpcode() == Instruction::Sub) {
	if (ShouldBreakUpSubtract(I)) {
	Instruction *NI = BreakUpSubtract(I, RedoInsts);
	RedoInsts.insert(I);
	MadeChange = true;
	I = NI;
	} else if (BinaryOperator::isNeg(I)) {
	// Otherwise, this is a negation. See if the operand is a multiply tree
	// and if this is not an inner node of a multiply tree.
	if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
	(!I->hasOneUse() \|\|
	!isReassociableOp(I->user_back(), Instruction::Mul))) {
	Instruction *NI = LowerNegateToMultiply(I);
	// If the negate was simplified, revisit the users to see if we can
	// reassociate further.
	for (User *U : NI->users()) {
	if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
	RedoInsts.insert(Tmp);
	}
	RedoInsts.insert(I);
	MadeChange = true;
	I = NI;
	}
	}
	} else if (I->getOpcode() == Instruction::FSub) {
	if (ShouldBreakUpSubtract(I)) {
	Instruction *NI = BreakUpSubtract(I, RedoInsts);
	RedoInsts.insert(I);
	MadeChange = true;
	I = NI;
	} else if (BinaryOperator::isFNeg(I)) {
	// Otherwise, this is a negation. See if the operand is a multiply tree
	// and if this is not an inner node of a multiply tree.
	if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
	(!I->hasOneUse() \|\|
	!isReassociableOp(I->user_back(), Instruction::FMul))) {
	// If the negate was simplified, revisit the users to see if we can
	// reassociate further.
	Instruction *NI = LowerNegateToMultiply(I);
	for (User *U : NI->users()) {
	if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
	RedoInsts.insert(Tmp);
	}
	RedoInsts.insert(I);
	MadeChange = true;
	I = NI;
	}
	}
	}

	// If this instruction is an associative binary operator, process it.
	if (!I->isAssociative()) return;
	BinaryOperator *BO = cast<BinaryOperator>(I);

	// If this is an interior node of a reassociable tree, ignore it until we
	// get to the root of the tree, to avoid N^2 analysis.
	unsigned Opcode = BO->getOpcode();
	if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
	// During the initial run we will get to the root of the tree.
	// But if we get here while we are redoing instructions, there is no
	// guarantee that the root will be visited. So Redo later
	if (BO->user_back() != BO &&
	BO->getParent() == BO->user_back()->getParent())
	RedoInsts.insert(BO->user_back());
	return;
	}

	// If this is an add tree that is used by a sub instruction, ignore it
	// until we process the subtract.
	if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
	cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
	return;
	if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
	cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub)
	return;

	ReassociateExpression(BO);
	}

	void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
	// First, walk the expression tree, linearizing the tree, collecting the
	// operand information.
	SmallVector<RepeatedValue, 8> Tree;
	MadeChange \|= LinearizeExprTree(I, Tree);
	SmallVector<ValueEntry, 8> Ops;
	Ops.reserve(Tree.size());
	for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
	RepeatedValue E = Tree[i];
	Ops.append(E.second.getZExtValue(),
	ValueEntry(getRank(E.first), E.first));
	}

	DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');

	// Now that we have linearized the tree to a list and have gathered all of
	// the operands and their ranks, sort the operands by their rank. Use a
	// stable_sort so that values with equal ranks will have their relative
	// positions maintained (and so the compiler is deterministic). Note that
	// this sorts so that the highest ranking values end up at the beginning of
	// the vector.
	std::stable_sort(Ops.begin(), Ops.end());

	// Now that we have the expression tree in a convenient
	// sorted form, optimize it globally if possible.
	if (Value *V = OptimizeExpression(I, Ops)) {
	if (V == I)
	// Self-referential expression in unreachable code.
	return;
	// This expression tree simplified to something that isn't a tree,
	// eliminate it.
	DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
	I->replaceAllUsesWith(V);
	if (Instruction *VI = dyn_cast<Instruction>(V))
	VI->setDebugLoc(I->getDebugLoc());
	RedoInsts.insert(I);
	++NumAnnihil;
	return;
	}

	// We want to sink immediates as deeply as possible except in the case where
	// this is a multiply tree used only by an add, and the immediate is a -1.
	// In this case we reassociate to put the negation on the outside so that we
	// can fold the negation into the add: (-X)Y + Z -> Z-XY
	if (I->hasOneUse()) {
	if (I->getOpcode() == Instruction::Mul &&
	cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
	isa<ConstantInt>(Ops.back().Op) &&
	cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
	ValueEntry Tmp = Ops.pop_back_val();
	Ops.insert(Ops.begin(), Tmp);
	} else if (I->getOpcode() == Instruction::FMul &&
	cast<Instruction>(I->user_back())->getOpcode() ==
	Instruction::FAdd &&
	isa<ConstantFP>(Ops.back().Op) &&
	cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) {
	ValueEntry Tmp = Ops.pop_back_val();
	Ops.insert(Ops.begin(), Tmp);
	}
	}

	DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');

	if (Ops.size() == 1) {
	if (Ops[0].Op == I)
	// Self-referential expression in unreachable code.
	return;

	// This expression tree simplified to something that isn't a tree,
	// eliminate it.
	I->replaceAllUsesWith(Ops[0].Op);
	if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op))
	OI->setDebugLoc(I->getDebugLoc());
	RedoInsts.insert(I);
	return;
	}

	// Now that we ordered and optimized the expressions, splat them back into
	// the expression tree, removing any unneeded nodes.
	RewriteExprTree(I, Ops);
	}

	PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
	// Get the functions basic blocks in Reverse Post Order. This order is used by
	// BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
	// blocks (it has been seen that the analysis in this pass could hang when
	// analysing dead basic blocks).
	ReversePostOrderTraversal<Function *> RPOT(&F);

	// Calculate the rank map for F.
	BuildRankMap(F, RPOT);

	MadeChange = false;
	// Traverse the same blocks that was analysed by BuildRankMap.
	for (BasicBlock *BI : RPOT) {
	assert(RankMap.count(&*BI) && "BB should be ranked.");
	// Optimize every instruction in the basic block.
	for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
	if (isInstructionTriviallyDead(&*II)) {
	EraseInst(&*II++);
	} else {
	OptimizeInst(&*II);
	assert(II->getParent() == &*BI && "Moved to a different block!");
	++II;
	}

	// Make a copy of all the instructions to be redone so we can remove dead
	// instructions.
	SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts);
	// Iterate over all instructions to be reevaluated and remove trivially dead
	// instructions. If any operand of the trivially dead instruction becomes
	// dead mark it for deletion as well. Continue this process until all
	// trivially dead instructions have been removed.
	while (!ToRedo.empty()) {
	Instruction *I = ToRedo.pop_back_val();
	if (isInstructionTriviallyDead(I)) {
	RecursivelyEraseDeadInsts(I, ToRedo);
	MadeChange = true;
	}
	}

	// Now that we have removed dead instructions, we can reoptimize the
	// remaining instructions.
	while (!RedoInsts.empty()) {
	Instruction *I = RedoInsts.pop_back_val();
	if (isInstructionTriviallyDead(I))
	EraseInst(I);
	else
	OptimizeInst(I);
	}
	}

	// We are done with the rank map.
	RankMap.clear();
	ValueRankMap.clear();

	if (MadeChange) {
	PreservedAnalyses PA;
	PA.preserveSet<CFGAnalyses>();
	PA.preserve<GlobalsAA>();
	return PA;
	}

	return PreservedAnalyses::all();
	}

	namespace {
	class ReassociateLegacyPass : public FunctionPass {
	ReassociatePass Impl;
	public:
	static char ID; // Pass identification, replacement for typeid
	ReassociateLegacyPass() : FunctionPass(ID) {
	initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
	}

	bool runOnFunction(Function &F) override {
	if (skipFunction(F))
	return false;

	FunctionAnalysisManager DummyFAM;
	auto PA = Impl.run(F, DummyFAM);
	return !PA.areAllPreserved();
	}

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.setPreservesCFG();
	AU.addPreserved<GlobalsAAWrapperPass>();
	}
	};
	}

	char ReassociateLegacyPass::ID = 0;
	INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
	"Reassociate expressions", false, false)

	// Public interface to the Reassociate pass
	FunctionPass *llvm::createReassociatePass() {
	return new ReassociateLegacyPass();
	}
	Index: head/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
	===================================================================
	--- head/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp (revision 322854)
	+++ head/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp (revision 322855)
	@@ -1,833 +1,834 @@
	//===- CloneFunction.cpp - Clone a function into another function ---------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the CloneFunctionInto interface, which is used as the
	// low-level function cloner. This is used by the CloneFunction and function
	// inliner to do the dirty work of copying the body of a function around.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/Analysis/ConstantFolding.h"
	#include "llvm/Analysis/InstructionSimplify.h"
	#include "llvm/Analysis/LoopInfo.h"
	#include "llvm/IR/CFG.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DebugInfo.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/IR/Module.h"
	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
	#include "llvm/Transforms/Utils/Cloning.h"
	#include "llvm/Transforms/Utils/Local.h"
	#include "llvm/Transforms/Utils/ValueMapper.h"
	#include <map>
	using namespace llvm;

	/// See comments in Cloning.h.
	BasicBlock llvm::CloneBasicBlock(const BasicBlock BB, ValueToValueMapTy &VMap,
	const Twine &NameSuffix, Function *F,
	ClonedCodeInfo *CodeInfo,
	DebugInfoFinder *DIFinder) {
	DenseMap<const MDNode , MDNode > Cache;
	BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
	if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);

	bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
	Module *TheModule = F ? F->getParent() : nullptr;

	// Loop over all instructions, and copy them over.
	for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
	II != IE; ++II) {

	if (DIFinder && TheModule) {
	if (auto *DDI = dyn_cast<DbgDeclareInst>(II))
	DIFinder->processDeclare(*TheModule, DDI);
	else if (auto *DVI = dyn_cast<DbgValueInst>(II))
	DIFinder->processValue(*TheModule, DVI);

	if (auto DbgLoc = II->getDebugLoc())
	DIFinder->processLocation(*TheModule, DbgLoc.get());
	}

	Instruction *NewInst = II->clone();
	if (II->hasName())
	NewInst->setName(II->getName()+NameSuffix);
	NewBB->getInstList().push_back(NewInst);
	VMap[&*II] = NewInst; // Add instruction map to value.

	hasCalls \|= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
	if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
	if (isa<ConstantInt>(AI->getArraySize()))
	hasStaticAllocas = true;
	else
	hasDynamicAllocas = true;
	}
	}

	if (CodeInfo) {
	CodeInfo->ContainsCalls \|= hasCalls;
	CodeInfo->ContainsDynamicAllocas \|= hasDynamicAllocas;
	CodeInfo->ContainsDynamicAllocas \|= hasStaticAllocas &&
	BB != &BB->getParent()->getEntryBlock();
	}
	return NewBB;
	}

	// Clone OldFunc into NewFunc, transforming the old arguments into references to
	// VMap values.
	//
	void llvm::CloneFunctionInto(Function NewFunc, const Function OldFunc,
	ValueToValueMapTy &VMap,
	bool ModuleLevelChanges,
	SmallVectorImpl<ReturnInst*> &Returns,
	const char NameSuffix, ClonedCodeInfo CodeInfo,
	ValueMapTypeRemapper *TypeMapper,
	ValueMaterializer *Materializer) {
	assert(NameSuffix && "NameSuffix cannot be null!");

	#ifndef NDEBUG
	for (const Argument &I : OldFunc->args())
	assert(VMap.count(&I) && "No mapping from source argument specified!");
	#endif

	// Copy all attributes other than those stored in the AttributeList. We need
	// to remap the parameter indices of the AttributeList.
	AttributeList NewAttrs = NewFunc->getAttributes();
	NewFunc->copyAttributesFrom(OldFunc);
	NewFunc->setAttributes(NewAttrs);

	// Fix up the personality function that got copied over.
	if (OldFunc->hasPersonalityFn())
	NewFunc->setPersonalityFn(
	MapValue(OldFunc->getPersonalityFn(), VMap,
	ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
	TypeMapper, Materializer));

	SmallVector<AttributeSet, 4> NewArgAttrs(NewFunc->arg_size());
	AttributeList OldAttrs = OldFunc->getAttributes();

	// Clone any argument attributes that are present in the VMap.
	for (const Argument &OldArg : OldFunc->args()) {
	if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
	NewArgAttrs[NewArg->getArgNo()] =
	OldAttrs.getParamAttributes(OldArg.getArgNo());
	}
	}

	NewFunc->setAttributes(
	AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(),
	OldAttrs.getRetAttributes(), NewArgAttrs));

	bool MustCloneSP =
	OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent();
	DISubprogram *SP = OldFunc->getSubprogram();
	if (SP) {
	assert(!MustCloneSP \|\| ModuleLevelChanges);
	// Add mappings for some DebugInfo nodes that we don't want duplicated
	// even if they're distinct.
	auto &MD = VMap.MD();
	MD[SP->getUnit()].reset(SP->getUnit());
	MD[SP->getType()].reset(SP->getType());
	MD[SP->getFile()].reset(SP->getFile());
	// If we're not cloning into the same module, no need to clone the
	// subprogram
	if (!MustCloneSP)
	MD[SP].reset(SP);
	}

	SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
	OldFunc->getAllMetadata(MDs);
	for (auto MD : MDs) {
	NewFunc->addMetadata(
	MD.first,
	*MapMetadata(MD.second, VMap,
	ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
	TypeMapper, Materializer));
	}

	// When we remap instructions, we want to avoid duplicating inlined
	// DISubprograms, so record all subprograms we find as we duplicate
	// instructions and then freeze them in the MD map.
	// We also record information about dbg.value and dbg.declare to avoid
	// duplicating the types.
	DebugInfoFinder DIFinder;

	// Loop over all of the basic blocks in the function, cloning them as
	// appropriate. Note that we save BE this way in order to handle cloning of
	// recursive functions into themselves.
	//
	for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
	BI != BE; ++BI) {
	const BasicBlock &BB = *BI;

	// Create a new basic block and copy instructions into it!
	BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo,
	SP ? &DIFinder : nullptr);

	// Add basic block mapping.
	VMap[&BB] = CBB;

	// It is only legal to clone a function if a block address within that
	// function is never referenced outside of the function. Given that, we
	// want to map block addresses from the old function to block addresses in
	// the clone. (This is different from the generic ValueMapper
	// implementation, which generates an invalid blockaddress when
	// cloning a function.)
	if (BB.hasAddressTaken()) {
	Constant OldBBAddr = BlockAddress::get(const_cast<Function>(OldFunc),
	const_cast<BasicBlock*>(&BB));
	VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
	}

	// Note return instructions for the caller.
	if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
	Returns.push_back(RI);
	}

	for (DISubprogram *ISP : DIFinder.subprograms()) {
	if (ISP != SP) {
	VMap.MD()[ISP].reset(ISP);
	}
	}

	for (auto *Type : DIFinder.types()) {
	VMap.MD()[Type].reset(Type);
	}

	// Loop over all of the instructions in the function, fixing up operand
	// references as we go. This uses VMap to do all the hard work.
	for (Function::iterator BB =
	cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
	BE = NewFunc->end();
	BB != BE; ++BB)
	// Loop over all instructions, fixing each one as we find it...
	for (Instruction &II : *BB)
	RemapInstruction(&II, VMap,
	ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
	TypeMapper, Materializer);
	}

	/// Return a copy of the specified function and add it to that function's
	/// module. Also, any references specified in the VMap are changed to refer to
	/// their mapped value instead of the original one. If any of the arguments to
	/// the function are in the VMap, the arguments are deleted from the resultant
	/// function. The VMap is updated to include mappings from all of the
	/// instructions and basicblocks in the function from their old to new values.
	///
	Function llvm::CloneFunction(Function F, ValueToValueMapTy &VMap,
	ClonedCodeInfo *CodeInfo) {
	std::vector<Type*> ArgTypes;

	// The user might be deleting arguments to the function by specifying them in
	// the VMap. If so, we need to not add the arguments to the arg ty vector
	//
	for (const Argument &I : F->args())
	if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet?
	ArgTypes.push_back(I.getType());

	// Create a new function type...
	FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
	ArgTypes, F->getFunctionType()->isVarArg());

	// Create the new function...
	Function *NewF =
	Function::Create(FTy, F->getLinkage(), F->getName(), F->getParent());

	// Loop over the arguments, copying the names of the mapped arguments over...
	Function::arg_iterator DestI = NewF->arg_begin();
	for (const Argument & I : F->args())
	if (VMap.count(&I) == 0) { // Is this argument preserved?
	DestI->setName(I.getName()); // Copy the name over...
	VMap[&I] = &*DestI++; // Add mapping to VMap
	}

	SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
	CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "",
	CodeInfo);

	return NewF;
	}



	namespace {
	/// This is a private class used to implement CloneAndPruneFunctionInto.
	struct PruningFunctionCloner {
	Function *NewFunc;
	const Function *OldFunc;
	ValueToValueMapTy &VMap;
	bool ModuleLevelChanges;
	const char *NameSuffix;
	ClonedCodeInfo *CodeInfo;

	public:
	PruningFunctionCloner(Function newFunc, const Function oldFunc,
	ValueToValueMapTy &valueMap, bool moduleLevelChanges,
	const char nameSuffix, ClonedCodeInfo codeInfo)
	: NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
	ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
	CodeInfo(codeInfo) {}

	/// The specified block is found to be reachable, clone it and
	/// anything that it can reach.
	void CloneBlock(const BasicBlock *BB,
	BasicBlock::const_iterator StartingInst,
	std::vector<const BasicBlock*> &ToClone);
	};
	}

	/// The specified block is found to be reachable, clone it and
	/// anything that it can reach.
	void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
	BasicBlock::const_iterator StartingInst,
	std::vector<const BasicBlock*> &ToClone){
	WeakTrackingVH &BBEntry = VMap[BB];

	// Have we already cloned this block?
	if (BBEntry) return;

	// Nope, clone it now.
	BasicBlock *NewBB;
	BBEntry = NewBB = BasicBlock::Create(BB->getContext());
	if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);

	// It is only legal to clone a function if a block address within that
	// function is never referenced outside of the function. Given that, we
	// want to map block addresses from the old function to block addresses in
	// the clone. (This is different from the generic ValueMapper
	// implementation, which generates an invalid blockaddress when
	// cloning a function.)
	//
	// Note that we don't need to fix the mapping for unreachable blocks;
	// the default mapping there is safe.
	if (BB->hasAddressTaken()) {
	Constant OldBBAddr = BlockAddress::get(const_cast<Function>(OldFunc),
	const_cast<BasicBlock*>(BB));
	VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
	}

	bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;

	// Loop over all instructions, and copy them over, DCE'ing as we go. This
	// loop doesn't include the terminator.
	for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
	II != IE; ++II) {

	Instruction *NewInst = II->clone();

	// Eagerly remap operands to the newly cloned instruction, except for PHI
	// nodes for which we defer processing until we update the CFG.
	if (!isa<PHINode>(NewInst)) {
	RemapInstruction(NewInst, VMap,
	ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);

	// If we can simplify this instruction to some other value, simply add
	// a mapping to that value rather than inserting a new instruction into
	// the basic block.
	if (Value *V =
	SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
	// On the off-chance that this simplifies to an instruction in the old
	// function, map it back into the new function.
	- if (Value *MappedV = VMap.lookup(V))
	- V = MappedV;
	+ if (NewFunc != OldFunc)
	+ if (Value *MappedV = VMap.lookup(V))
	+ V = MappedV;

	if (!NewInst->mayHaveSideEffects()) {
	VMap[&*II] = V;
	NewInst->deleteValue();
	continue;
	}
	}
	}

	if (II->hasName())
	NewInst->setName(II->getName()+NameSuffix);
	VMap[&*II] = NewInst; // Add instruction map to value.
	NewBB->getInstList().push_back(NewInst);
	hasCalls \|= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));

	if (CodeInfo)
	if (auto CS = ImmutableCallSite(&*II))
	if (CS.hasOperandBundles())
	CodeInfo->OperandBundleCallSites.push_back(NewInst);

	if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
	if (isa<ConstantInt>(AI->getArraySize()))
	hasStaticAllocas = true;
	else
	hasDynamicAllocas = true;
	}
	}

	// Finally, clone over the terminator.
	const TerminatorInst *OldTI = BB->getTerminator();
	bool TerminatorDone = false;
	if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
	if (BI->isConditional()) {
	// If the condition was a known constant in the callee...
	ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
	// Or is a known constant in the caller...
	if (!Cond) {
	Value *V = VMap.lookup(BI->getCondition());
	Cond = dyn_cast_or_null<ConstantInt>(V);
	}

	// Constant fold to uncond branch!
	if (Cond) {
	BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
	VMap[OldTI] = BranchInst::Create(Dest, NewBB);
	ToClone.push_back(Dest);
	TerminatorDone = true;
	}
	}
	} else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
	// If switching on a value known constant in the caller.
	ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
	if (!Cond) { // Or known constant after constant prop in the callee...
	Value *V = VMap.lookup(SI->getCondition());
	Cond = dyn_cast_or_null<ConstantInt>(V);
	}
	if (Cond) { // Constant fold to uncond branch!
	SwitchInst::ConstCaseHandle Case = *SI->findCaseValue(Cond);
	BasicBlock Dest = const_cast<BasicBlock>(Case.getCaseSuccessor());
	VMap[OldTI] = BranchInst::Create(Dest, NewBB);
	ToClone.push_back(Dest);
	TerminatorDone = true;
	}
	}

	if (!TerminatorDone) {
	Instruction *NewInst = OldTI->clone();
	if (OldTI->hasName())
	NewInst->setName(OldTI->getName()+NameSuffix);
	NewBB->getInstList().push_back(NewInst);
	VMap[OldTI] = NewInst; // Add instruction map to value.

	if (CodeInfo)
	if (auto CS = ImmutableCallSite(OldTI))
	if (CS.hasOperandBundles())
	CodeInfo->OperandBundleCallSites.push_back(NewInst);

	// Recursively clone any reachable successor blocks.
	const TerminatorInst *TI = BB->getTerminator();
	for (const BasicBlock *Succ : TI->successors())
	ToClone.push_back(Succ);
	}

	if (CodeInfo) {
	CodeInfo->ContainsCalls \|= hasCalls;
	CodeInfo->ContainsDynamicAllocas \|= hasDynamicAllocas;
	CodeInfo->ContainsDynamicAllocas \|= hasStaticAllocas &&
	BB != &BB->getParent()->front();
	}
	}

	/// This works like CloneAndPruneFunctionInto, except that it does not clone the
	/// entire function. Instead it starts at an instruction provided by the caller
	/// and copies (and prunes) only the code reachable from that instruction.
	void llvm::CloneAndPruneIntoFromInst(Function NewFunc, const Function OldFunc,
	const Instruction *StartingInst,
	ValueToValueMapTy &VMap,
	bool ModuleLevelChanges,
	SmallVectorImpl<ReturnInst *> &Returns,
	const char *NameSuffix,
	ClonedCodeInfo *CodeInfo) {
	assert(NameSuffix && "NameSuffix cannot be null!");

	ValueMapTypeRemapper *TypeMapper = nullptr;
	ValueMaterializer *Materializer = nullptr;

	#ifndef NDEBUG
	// If the cloning starts at the beginning of the function, verify that
	// the function arguments are mapped.
	if (!StartingInst)
	for (const Argument &II : OldFunc->args())
	assert(VMap.count(&II) && "No mapping from source argument specified!");
	#endif

	PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
	NameSuffix, CodeInfo);
	const BasicBlock *StartingBB;
	if (StartingInst)
	StartingBB = StartingInst->getParent();
	else {
	StartingBB = &OldFunc->getEntryBlock();
	StartingInst = &StartingBB->front();
	}

	// Clone the entry block, and anything recursively reachable from it.
	std::vector<const BasicBlock*> CloneWorklist;
	PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
	while (!CloneWorklist.empty()) {
	const BasicBlock *BB = CloneWorklist.back();
	CloneWorklist.pop_back();
	PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
	}

	// Loop over all of the basic blocks in the old function. If the block was
	// reachable, we have cloned it and the old block is now in the value map:
	// insert it into the new function in the right order. If not, ignore it.
	//
	// Defer PHI resolution until rest of function is resolved.
	SmallVector<const PHINode*, 16> PHIToResolve;
	for (const BasicBlock &BI : *OldFunc) {
	Value *V = VMap.lookup(&BI);
	BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
	if (!NewBB) continue; // Dead block.

	// Add the new block to the new function.
	NewFunc->getBasicBlockList().push_back(NewBB);

	// Handle PHI nodes specially, as we have to remove references to dead
	// blocks.
	for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) {
	// PHI nodes may have been remapped to non-PHI nodes by the caller or
	// during the cloning process.
	if (const PHINode *PN = dyn_cast<PHINode>(I)) {
	if (isa<PHINode>(VMap[PN]))
	PHIToResolve.push_back(PN);
	else
	break;
	} else {
	break;
	}
	}

	// Finally, remap the terminator instructions, as those can't be remapped
	// until all BBs are mapped.
	RemapInstruction(NewBB->getTerminator(), VMap,
	ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
	TypeMapper, Materializer);
	}

	// Defer PHI resolution until rest of function is resolved, PHI resolution
	// requires the CFG to be up-to-date.
	for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
	const PHINode *OPN = PHIToResolve[phino];
	unsigned NumPreds = OPN->getNumIncomingValues();
	const BasicBlock *OldBB = OPN->getParent();
	BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);

	// Map operands for blocks that are live and remove operands for blocks
	// that are dead.
	for (; phino != PHIToResolve.size() &&
	PHIToResolve[phino]->getParent() == OldBB; ++phino) {
	OPN = PHIToResolve[phino];
	PHINode *PN = cast<PHINode>(VMap[OPN]);
	for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
	Value *V = VMap.lookup(PN->getIncomingBlock(pred));
	if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
	Value *InVal = MapValue(PN->getIncomingValue(pred),
	VMap,
	ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
	assert(InVal && "Unknown input value?");
	PN->setIncomingValue(pred, InVal);
	PN->setIncomingBlock(pred, MappedBlock);
	} else {
	PN->removeIncomingValue(pred, false);
	--pred; // Revisit the next entry.
	--e;
	}
	}
	}

	// The loop above has removed PHI entries for those blocks that are dead
	// and has updated others. However, if a block is live (i.e. copied over)
	// but its terminator has been changed to not go to this block, then our
	// phi nodes will have invalid entries. Update the PHI nodes in this
	// case.
	PHINode *PN = cast<PHINode>(NewBB->begin());
	NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
	if (NumPreds != PN->getNumIncomingValues()) {
	assert(NumPreds < PN->getNumIncomingValues());
	// Count how many times each predecessor comes to this block.
	std::map<BasicBlock*, unsigned> PredCount;
	for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
	PI != E; ++PI)
	--PredCount[*PI];

	// Figure out how many entries to remove from each PHI.
	for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
	++PredCount[PN->getIncomingBlock(i)];

	// At this point, the excess predecessor entries are positive in the
	// map. Loop over all of the PHIs and remove excess predecessor
	// entries.
	BasicBlock::iterator I = NewBB->begin();
	for (; (PN = dyn_cast<PHINode>(I)); ++I) {
	for (const auto &PCI : PredCount) {
	BasicBlock *Pred = PCI.first;
	for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove)
	PN->removeIncomingValue(Pred, false);
	}
	}
	}

	// If the loops above have made these phi nodes have 0 or 1 operand,
	// replace them with undef or the input value. We must do this for
	// correctness, because 0-operand phis are not valid.
	PN = cast<PHINode>(NewBB->begin());
	if (PN->getNumIncomingValues() == 0) {
	BasicBlock::iterator I = NewBB->begin();
	BasicBlock::const_iterator OldI = OldBB->begin();
	while ((PN = dyn_cast<PHINode>(I++))) {
	Value *NV = UndefValue::get(PN->getType());
	PN->replaceAllUsesWith(NV);
	assert(VMap[&*OldI] == PN && "VMap mismatch");
	VMap[&*OldI] = NV;
	PN->eraseFromParent();
	++OldI;
	}
	}
	}

	// Make a second pass over the PHINodes now that all of them have been
	// remapped into the new function, simplifying the PHINode and performing any
	// recursive simplifications exposed. This will transparently update the
	// WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce
	// two PHINodes, the iteration over the old PHIs remains valid, and the
	// mapping will just map us to the new node (which may not even be a PHI
	// node).
	const DataLayout &DL = NewFunc->getParent()->getDataLayout();
	SmallSetVector<const Value *, 8> Worklist;
	for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
	if (isa<PHINode>(VMap[PHIToResolve[Idx]]))
	Worklist.insert(PHIToResolve[Idx]);

	// Note that we must test the size on each iteration, the worklist can grow.
	for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
	const Value *OrigV = Worklist[Idx];
	auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV));
	if (!I)
	continue;

	// Skip over non-intrinsic callsites, we don't want to remove any nodes from
	// the CGSCC.
	CallSite CS = CallSite(I);
	if (CS && CS.getCalledFunction() && !CS.getCalledFunction()->isIntrinsic())
	continue;

	// See if this instruction simplifies.
	Value *SimpleV = SimplifyInstruction(I, DL);
	if (!SimpleV)
	continue;

	// Stash away all the uses of the old instruction so we can check them for
	// recursive simplifications after a RAUW. This is cheaper than checking all
	// uses of To on the recursive step in most cases.
	for (const User *U : OrigV->users())
	Worklist.insert(cast<Instruction>(U));

	// Replace the instruction with its simplified value.
	I->replaceAllUsesWith(SimpleV);

	// If the original instruction had no side effects, remove it.
	if (isInstructionTriviallyDead(I))
	I->eraseFromParent();
	else
	VMap[OrigV] = I;
	}

	// Now that the inlined function body has been fully constructed, go through
	// and zap unconditional fall-through branches. This happens all the time when
	// specializing code: code specialization turns conditional branches into
	// uncond branches, and this code folds them.
	Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
	Function::iterator I = Begin;
	while (I != NewFunc->end()) {
	// Check if this block has become dead during inlining or other
	// simplifications. Note that the first block will appear dead, as it has
	// not yet been wired up properly.
	if (I != Begin && (pred_begin(&I) == pred_end(&I) \|\|
	I->getSinglePredecessor() == &*I)) {
	BasicBlock DeadBB = &I++;
	DeleteDeadBlock(DeadBB);
	continue;
	}

	// We need to simplify conditional branches and switches with a constant
	// operand. We try to prune these out when cloning, but if the
	// simplification required looking through PHI nodes, those are only
	// available after forming the full basic block. That may leave some here,
	// and we still want to prune the dead code as early as possible.
	ConstantFoldTerminator(&*I);

	BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
	if (!BI \|\| BI->isConditional()) { ++I; continue; }

	BasicBlock *Dest = BI->getSuccessor(0);
	if (!Dest->getSinglePredecessor()) {
	++I; continue;
	}

	// We shouldn't be able to get single-entry PHI nodes here, as instsimplify
	// above should have zapped all of them..
	assert(!isa<PHINode>(Dest->begin()));

	// We know all single-entry PHI nodes in the inlined function have been
	// removed, so we just need to splice the blocks.
	BI->eraseFromParent();

	// Make all PHI nodes that referred to Dest now refer to I as their source.
	Dest->replaceAllUsesWith(&*I);

	// Move all the instructions in the succ to the pred.
	I->getInstList().splice(I->end(), Dest->getInstList());

	// Remove the dest block.
	Dest->eraseFromParent();

	// Do not increment I, iteratively merge all things this block branches to.
	}

	// Make a final pass over the basic blocks from the old function to gather
	// any return instructions which survived folding. We have to do this here
	// because we can iteratively remove and merge returns above.
	for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(),
	E = NewFunc->end();
	I != E; ++I)
	if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
	Returns.push_back(RI);
	}


	/// This works exactly like CloneFunctionInto,
	/// except that it does some simple constant prop and DCE on the fly. The
	/// effect of this is to copy significantly less code in cases where (for
	/// example) a function call with constant arguments is inlined, and those
	/// constant arguments cause a significant amount of code in the callee to be
	/// dead. Since this doesn't produce an exact copy of the input, it can't be
	/// used for things like CloneFunction or CloneModule.
	void llvm::CloneAndPruneFunctionInto(Function NewFunc, const Function OldFunc,
	ValueToValueMapTy &VMap,
	bool ModuleLevelChanges,
	SmallVectorImpl<ReturnInst*> &Returns,
	const char *NameSuffix,
	ClonedCodeInfo *CodeInfo,
	Instruction *TheCall) {
	CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
	ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
	}

	/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.
	void llvm::remapInstructionsInBlocks(
	const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) {
	// Rewrite the code to refer to itself.
	for (auto *BB : Blocks)
	for (auto &Inst : *BB)
	RemapInstruction(&Inst, VMap,
	RF_NoModuleLevelChanges \| RF_IgnoreMissingLocals);
	}

	/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
	/// Blocks.
	///
	/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
	/// \p LoopDomBB. Insert the new blocks before block specified in \p Before.
	Loop llvm::cloneLoopWithPreheader(BasicBlock Before, BasicBlock *LoopDomBB,
	Loop *OrigLoop, ValueToValueMapTy &VMap,
	const Twine &NameSuffix, LoopInfo *LI,
	DominatorTree *DT,
	SmallVectorImpl<BasicBlock *> &Blocks) {
	assert(OrigLoop->getSubLoops().empty() &&
	"Loop to be cloned cannot have inner loop");
	Function *F = OrigLoop->getHeader()->getParent();
	Loop *ParentLoop = OrigLoop->getParentLoop();

	Loop *NewLoop = new Loop();
	if (ParentLoop)
	ParentLoop->addChildLoop(NewLoop);
	else
	LI->addTopLevelLoop(NewLoop);

	BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
	assert(OrigPH && "No preheader");
	BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
	// To rename the loop PHIs.
	VMap[OrigPH] = NewPH;
	Blocks.push_back(NewPH);

	// Update LoopInfo.
	if (ParentLoop)
	ParentLoop->addBasicBlockToLoop(NewPH, *LI);

	// Update DominatorTree.
	DT->addNewBlock(NewPH, LoopDomBB);

	for (BasicBlock *BB : OrigLoop->getBlocks()) {
	BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
	VMap[BB] = NewBB;

	// Update LoopInfo.
	NewLoop->addBasicBlockToLoop(NewBB, *LI);

	// Add DominatorTree node. After seeing all blocks, update to correct IDom.
	DT->addNewBlock(NewBB, NewPH);

	Blocks.push_back(NewBB);
	}

	for (BasicBlock *BB : OrigLoop->getBlocks()) {
	// Update DominatorTree.
	BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
	DT->changeImmediateDominator(cast<BasicBlock>(VMap[BB]),
	cast<BasicBlock>(VMap[IDomBB]));
	}

	// Move them physically from the end of the block list.
	F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
	NewPH);
	F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
	NewLoop->getHeader()->getIterator(), F->end());

	return NewLoop;
	}

	/// \brief Duplicate non-Phi instructions from the beginning of block up to
	/// StopAt instruction into a split block between BB and its predecessor.
	BasicBlock *
	llvm::DuplicateInstructionsInSplitBetween(BasicBlock BB, BasicBlock PredBB,
	Instruction *StopAt,
	ValueToValueMapTy &ValueMapping) {
	// We are going to have to map operands from the original BB block to the new
	// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
	// account for entry from PredBB.
	BasicBlock::iterator BI = BB->begin();
	for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
	ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);

	BasicBlock *NewBB = SplitEdge(PredBB, BB);
	NewBB->setName(PredBB->getName() + ".split");
	Instruction *NewTerm = NewBB->getTerminator();

	// Clone the non-phi instructions of BB into NewBB, keeping track of the
	// mapping and using it to remap operands in the cloned instructions.
	for (; StopAt != &*BI; ++BI) {
	Instruction *New = BI->clone();
	New->setName(BI->getName());
	New->insertBefore(NewTerm);
	ValueMapping[&*BI] = New;

	// Remap operands to patch up intra-block references.
	for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
	if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
	auto I = ValueMapping.find(Inst);
	if (I != ValueMapping.end())
	New->setOperand(i, I->second);
	}
	}

	return NewBB;
	}
	Index: head/contrib/llvm/tools/clang/include/clang/AST/DeclCXX.h
	===================================================================
	--- head/contrib/llvm/tools/clang/include/clang/AST/DeclCXX.h (revision 322854)
	+++ head/contrib/llvm/tools/clang/include/clang/AST/DeclCXX.h (revision 322855)
	@@ -1,3704 +1,3774 @@
	//===-- DeclCXX.h - Classes for representing C++ declarations -- C++ --=====//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// \brief Defines the C++ Decl subclasses, other than those for templates
	/// (found in DeclTemplate.h) and friends (in DeclFriend.h).
	///
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_CLANG_AST_DECLCXX_H
	#define LLVM_CLANG_AST_DECLCXX_H

	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTUnresolvedSet.h"
	#include "clang/AST/Attr.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/LambdaCapture.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/PointerIntPair.h"
	#include "llvm/Support/Compiler.h"

	namespace clang {

	class ClassTemplateDecl;
	class ClassTemplateSpecializationDecl;
	class ConstructorUsingShadowDecl;
	class CXXBasePath;
	class CXXBasePaths;
	class CXXConstructorDecl;
	class CXXConversionDecl;
	class CXXDestructorDecl;
	class CXXMethodDecl;
	class CXXRecordDecl;
	class CXXMemberLookupCriteria;
	class CXXFinalOverriderMap;
	class CXXIndirectPrimaryBaseSet;
	class FriendDecl;
	class LambdaExpr;
	class UsingDecl;

	/// \brief Represents any kind of function declaration, whether it is a
	/// concrete function or a function template.
	class AnyFunctionDecl {
	NamedDecl *Function;

	AnyFunctionDecl(NamedDecl *ND) : Function(ND) { }

	public:
	AnyFunctionDecl(FunctionDecl *FD) : Function(FD) { }
	AnyFunctionDecl(FunctionTemplateDecl *FTD);

	/// \brief Implicily converts any function or function template into a
	/// named declaration.
	operator NamedDecl *() const { return Function; }

	/// \brief Retrieve the underlying function or function template.
	NamedDecl *get() const { return Function; }

	static AnyFunctionDecl getFromNamedDecl(NamedDecl *ND) {
	return AnyFunctionDecl(ND);
	}
	};

	} // end namespace clang

	namespace llvm {
	// Provide PointerLikeTypeTraits for non-cvr pointers.
	template<>
	class PointerLikeTypeTraits< ::clang::AnyFunctionDecl> {
	public:
	static inline void *getAsVoidPointer(::clang::AnyFunctionDecl F) {
	return F.get();
	}
	static inline ::clang::AnyFunctionDecl getFromVoidPointer(void *P) {
	return ::clang::AnyFunctionDecl::getFromNamedDecl(
	static_cast< ::clang::NamedDecl*>(P));
	}

	enum { NumLowBitsAvailable = 2 };
	};

	} // end namespace llvm

	namespace clang {

	/// \brief Represents an access specifier followed by colon ':'.
	///
	/// An objects of this class represents sugar for the syntactic occurrence
	/// of an access specifier followed by a colon in the list of member
	/// specifiers of a C++ class definition.
	///
	/// Note that they do not represent other uses of access specifiers,
	/// such as those occurring in a list of base specifiers.
	/// Also note that this class has nothing to do with so-called
	/// "access declarations" (C++98 11.3 [class.access.dcl]).
	class AccessSpecDecl : public Decl {
	virtual void anchor();
	/// \brief The location of the ':'.
	SourceLocation ColonLoc;

	AccessSpecDecl(AccessSpecifier AS, DeclContext *DC,
	SourceLocation ASLoc, SourceLocation ColonLoc)
	: Decl(AccessSpec, DC, ASLoc), ColonLoc(ColonLoc) {
	setAccess(AS);
	}
	AccessSpecDecl(EmptyShell Empty)
	: Decl(AccessSpec, Empty) { }
	public:
	/// \brief The location of the access specifier.
	SourceLocation getAccessSpecifierLoc() const { return getLocation(); }
	/// \brief Sets the location of the access specifier.
	void setAccessSpecifierLoc(SourceLocation ASLoc) { setLocation(ASLoc); }

	/// \brief The location of the colon following the access specifier.
	SourceLocation getColonLoc() const { return ColonLoc; }
	/// \brief Sets the location of the colon.
	void setColonLoc(SourceLocation CLoc) { ColonLoc = CLoc; }

	SourceRange getSourceRange() const override LLVM_READONLY {
	return SourceRange(getAccessSpecifierLoc(), getColonLoc());
	}

	static AccessSpecDecl *Create(ASTContext &C, AccessSpecifier AS,
	DeclContext *DC, SourceLocation ASLoc,
	SourceLocation ColonLoc) {
	return new (C, DC) AccessSpecDecl(AS, DC, ASLoc, ColonLoc);
	}
	static AccessSpecDecl *CreateDeserialized(ASTContext &C, unsigned ID);

	// Implement isa/cast/dyncast/etc.
	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == AccessSpec; }
	};

	/// \brief Represents a base class of a C++ class.
	///
	/// Each CXXBaseSpecifier represents a single, direct base class (or
	/// struct) of a C++ class (or struct). It specifies the type of that
	/// base class, whether it is a virtual or non-virtual base, and what
	/// level of access (public, protected, private) is used for the
	/// derivation. For example:
	///
	/// \code
	/// class A { };
	/// class B { };
	/// class C : public virtual A, protected B { };
	/// \endcode
	///
	/// In this code, C will have two CXXBaseSpecifiers, one for "public
	/// virtual A" and the other for "protected B".
	class CXXBaseSpecifier {
	/// \brief The source code range that covers the full base
	/// specifier, including the "virtual" (if present) and access
	/// specifier (if present).
	SourceRange Range;

	/// \brief The source location of the ellipsis, if this is a pack
	/// expansion.
	SourceLocation EllipsisLoc;

	/// \brief Whether this is a virtual base class or not.
	unsigned Virtual : 1;

	/// \brief Whether this is the base of a class (true) or of a struct (false).
	///
	/// This determines the mapping from the access specifier as written in the
	/// source code to the access specifier used for semantic analysis.
	unsigned BaseOfClass : 1;

	/// \brief Access specifier as written in the source code (may be AS_none).
	///
	/// The actual type of data stored here is an AccessSpecifier, but we use
	/// "unsigned" here to work around a VC++ bug.
	unsigned Access : 2;

	/// \brief Whether the class contains a using declaration
	/// to inherit the named class's constructors.
	unsigned InheritConstructors : 1;

	/// \brief The type of the base class.
	///
	/// This will be a class or struct (or a typedef of such). The source code
	/// range does not include the \c virtual or the access specifier.
	TypeSourceInfo *BaseTypeInfo;

	public:
	CXXBaseSpecifier() { }

	CXXBaseSpecifier(SourceRange R, bool V, bool BC, AccessSpecifier A,
	TypeSourceInfo *TInfo, SourceLocation EllipsisLoc)
	: Range(R), EllipsisLoc(EllipsisLoc), Virtual(V), BaseOfClass(BC),
	Access(A), InheritConstructors(false), BaseTypeInfo(TInfo) { }

	/// \brief Retrieves the source range that contains the entire base specifier.
	SourceRange getSourceRange() const LLVM_READONLY { return Range; }
	SourceLocation getLocStart() const LLVM_READONLY { return Range.getBegin(); }
	SourceLocation getLocEnd() const LLVM_READONLY { return Range.getEnd(); }

	/// \brief Get the location at which the base class type was written.
	SourceLocation getBaseTypeLoc() const LLVM_READONLY {
	return BaseTypeInfo->getTypeLoc().getLocStart();
	}

	/// \brief Determines whether the base class is a virtual base class (or not).
	bool isVirtual() const { return Virtual; }

	/// \brief Determine whether this base class is a base of a class declared
	/// with the 'class' keyword (vs. one declared with the 'struct' keyword).
	bool isBaseOfClass() const { return BaseOfClass; }

	/// \brief Determine whether this base specifier is a pack expansion.
	bool isPackExpansion() const { return EllipsisLoc.isValid(); }

	/// \brief Determine whether this base class's constructors get inherited.
	bool getInheritConstructors() const { return InheritConstructors; }

	/// \brief Set that this base class's constructors should be inherited.
	void setInheritConstructors(bool Inherit = true) {
	InheritConstructors = Inherit;
	}

	/// \brief For a pack expansion, determine the location of the ellipsis.
	SourceLocation getEllipsisLoc() const {
	return EllipsisLoc;
	}

	/// \brief Returns the access specifier for this base specifier.
	///
	/// This is the actual base specifier as used for semantic analysis, so
	/// the result can never be AS_none. To retrieve the access specifier as
	/// written in the source code, use getAccessSpecifierAsWritten().
	AccessSpecifier getAccessSpecifier() const {
	if ((AccessSpecifier)Access == AS_none)
	return BaseOfClass? AS_private : AS_public;
	else
	return (AccessSpecifier)Access;
	}

	/// \brief Retrieves the access specifier as written in the source code
	/// (which may mean that no access specifier was explicitly written).
	///
	/// Use getAccessSpecifier() to retrieve the access specifier for use in
	/// semantic analysis.
	AccessSpecifier getAccessSpecifierAsWritten() const {
	return (AccessSpecifier)Access;
	}

	/// \brief Retrieves the type of the base class.
	///
	/// This type will always be an unqualified class type.
	QualType getType() const {
	return BaseTypeInfo->getType().getUnqualifiedType();
	}

	/// \brief Retrieves the type and source location of the base class.
	TypeSourceInfo *getTypeSourceInfo() const { return BaseTypeInfo; }
	};

	/// \brief Represents a C++ struct/union/class.
	class CXXRecordDecl : public RecordDecl {

	friend void TagDecl::startDefinition();

	/// Values used in DefinitionData fields to represent special members.
	enum SpecialMemberFlags {
	SMF_DefaultConstructor = 0x1,
	SMF_CopyConstructor = 0x2,
	SMF_MoveConstructor = 0x4,
	SMF_CopyAssignment = 0x8,
	SMF_MoveAssignment = 0x10,
	SMF_Destructor = 0x20,
	SMF_All = 0x3f
	};

	struct DefinitionData {
	DefinitionData(CXXRecordDecl *D);

	/// \brief True if this class has any user-declared constructors.
	unsigned UserDeclaredConstructor : 1;

	/// \brief The user-declared special members which this class has.
	unsigned UserDeclaredSpecialMembers : 6;

	/// \brief True when this class is an aggregate.
	unsigned Aggregate : 1;

	/// \brief True when this class is a POD-type.
	unsigned PlainOldData : 1;

	/// true when this class is empty for traits purposes,
	/// i.e. has no data members other than 0-width bit-fields, has no
	/// virtual function/base, and doesn't inherit from a non-empty
	/// class. Doesn't take union-ness into account.
	unsigned Empty : 1;

	/// \brief True when this class is polymorphic, i.e., has at
	/// least one virtual member or derives from a polymorphic class.
	unsigned Polymorphic : 1;

	/// \brief True when this class is abstract, i.e., has at least
	/// one pure virtual function, (that can come from a base class).
	unsigned Abstract : 1;

	/// \brief True when this class has standard layout.
	///
	/// C++11 [class]p7. A standard-layout class is a class that:
	/// * has no non-static data members of type non-standard-layout class (or
	/// array of such types) or reference,
	/// * has no virtual functions (10.3) and no virtual base classes (10.1),
	/// * has the same access control (Clause 11) for all non-static data
	/// members
	/// * has no non-standard-layout base classes,
	/// * either has no non-static data members in the most derived class and at
	/// most one base class with non-static data members, or has no base
	/// classes with non-static data members, and
	/// * has no base classes of the same type as the first non-static data
	/// member.
	unsigned IsStandardLayout : 1;

	/// \brief True when there are no non-empty base classes.
	///
	/// This is a helper bit of state used to implement IsStandardLayout more
	/// efficiently.
	unsigned HasNoNonEmptyBases : 1;

	/// \brief True when there are private non-static data members.
	unsigned HasPrivateFields : 1;

	/// \brief True when there are protected non-static data members.
	unsigned HasProtectedFields : 1;

	/// \brief True when there are private non-static data members.
	unsigned HasPublicFields : 1;

	/// \brief True if this class (or any subobject) has mutable fields.
	unsigned HasMutableFields : 1;

	/// \brief True if this class (or any nested anonymous struct or union)
	/// has variant members.
	unsigned HasVariantMembers : 1;

	/// \brief True if there no non-field members declared by the user.
	unsigned HasOnlyCMembers : 1;

	/// \brief True if any field has an in-class initializer, including those
	/// within anonymous unions or structs.
	unsigned HasInClassInitializer : 1;

	/// \brief True if any field is of reference type, and does not have an
	/// in-class initializer.
	///
	/// In this case, value-initialization of this class is illegal in C++98
	/// even if the class has a trivial default constructor.
	unsigned HasUninitializedReferenceMember : 1;

	/// \brief True if any non-mutable field whose type doesn't have a user-
	/// provided default ctor also doesn't have an in-class initializer.
	unsigned HasUninitializedFields : 1;

	/// \brief True if there are any member using-declarations that inherit
	/// constructors from a base class.
	unsigned HasInheritedConstructor : 1;

	/// \brief True if there are any member using-declarations named
	/// 'operator='.
	unsigned HasInheritedAssignment : 1;

	/// \brief These flags are \c true if a defaulted corresponding special
	/// member can't be fully analyzed without performing overload resolution.
	/// @{
	+ unsigned NeedOverloadResolutionForCopyConstructor : 1;
	unsigned NeedOverloadResolutionForMoveConstructor : 1;
	unsigned NeedOverloadResolutionForMoveAssignment : 1;
	unsigned NeedOverloadResolutionForDestructor : 1;
	/// @}

	/// \brief These flags are \c true if an implicit defaulted corresponding
	/// special member would be defined as deleted.
	/// @{
	+ unsigned DefaultedCopyConstructorIsDeleted : 1;
	unsigned DefaultedMoveConstructorIsDeleted : 1;
	unsigned DefaultedMoveAssignmentIsDeleted : 1;
	unsigned DefaultedDestructorIsDeleted : 1;
	/// @}

	/// \brief The trivial special members which this class has, per
	/// C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25,
	/// C++11 [class.dtor]p5, or would have if the member were not suppressed.
	///
	/// This excludes any user-declared but not user-provided special members
	/// which have been declared but not yet defined.
	unsigned HasTrivialSpecialMembers : 6;

	/// \brief The declared special members of this class which are known to be
	/// non-trivial.
	///
	/// This excludes any user-declared but not user-provided special members
	/// which have been declared but not yet defined, and any implicit special
	/// members which have not yet been declared.
	unsigned DeclaredNonTrivialSpecialMembers : 6;

	/// \brief True when this class has a destructor with no semantic effect.
	unsigned HasIrrelevantDestructor : 1;

	/// \brief True when this class has at least one user-declared constexpr
	/// constructor which is neither the copy nor move constructor.
	unsigned HasConstexprNonCopyMoveConstructor : 1;

	/// \brief True if this class has a (possibly implicit) defaulted default
	/// constructor.
	unsigned HasDefaultedDefaultConstructor : 1;

	+ /// \brief True if this class can be passed in a non-address-preserving
	+ /// fashion (such as in registers) according to the C++ language rules.
	+ /// This does not imply anything about how the ABI in use will actually
	+ /// pass an object of this class.
	+ unsigned CanPassInRegisters : 1;
	+
	/// \brief True if a defaulted default constructor for this class would
	/// be constexpr.
	unsigned DefaultedDefaultConstructorIsConstexpr : 1;

	/// \brief True if this class has a constexpr default constructor.
	///
	/// This is true for either a user-declared constexpr default constructor
	/// or an implicitly declared constexpr default constructor.
	unsigned HasConstexprDefaultConstructor : 1;

	/// \brief True when this class contains at least one non-static data
	/// member or base class of non-literal or volatile type.
	unsigned HasNonLiteralTypeFieldsOrBases : 1;

	/// \brief True when visible conversion functions are already computed
	/// and are available.
	unsigned ComputedVisibleConversions : 1;

	/// \brief Whether we have a C++11 user-provided default constructor (not
	/// explicitly deleted or defaulted).
	unsigned UserProvidedDefaultConstructor : 1;

	/// \brief The special members which have been declared for this class,
	/// either by the user or implicitly.
	unsigned DeclaredSpecialMembers : 6;

	/// \brief Whether an implicit copy constructor could have a const-qualified
	/// parameter, for initializing virtual bases and for other subobjects.
	unsigned ImplicitCopyConstructorCanHaveConstParamForVBase : 1;
	unsigned ImplicitCopyConstructorCanHaveConstParamForNonVBase : 1;

	/// \brief Whether an implicit copy assignment operator would have a
	/// const-qualified parameter.
	unsigned ImplicitCopyAssignmentHasConstParam : 1;

	/// \brief Whether any declared copy constructor has a const-qualified
	/// parameter.
	unsigned HasDeclaredCopyConstructorWithConstParam : 1;

	/// \brief Whether any declared copy assignment operator has either a
	/// const-qualified reference parameter or a non-reference parameter.
	unsigned HasDeclaredCopyAssignmentWithConstParam : 1;

	/// \brief Whether this class describes a C++ lambda.
	unsigned IsLambda : 1;

	/// \brief Whether we are currently parsing base specifiers.
	unsigned IsParsingBaseSpecifiers : 1;

	unsigned HasODRHash : 1;

	/// \brief A hash of parts of the class to help in ODR checking.
	unsigned ODRHash;

	/// \brief The number of base class specifiers in Bases.
	unsigned NumBases;

	/// \brief The number of virtual base class specifiers in VBases.
	unsigned NumVBases;

	/// \brief Base classes of this class.
	///
	/// FIXME: This is wasted space for a union.
	LazyCXXBaseSpecifiersPtr Bases;

	/// \brief direct and indirect virtual base classes of this class.
	LazyCXXBaseSpecifiersPtr VBases;

	/// \brief The conversion functions of this C++ class (but not its
	/// inherited conversion functions).
	///
	/// Each of the entries in this overload set is a CXXConversionDecl.
	LazyASTUnresolvedSet Conversions;

	/// \brief The conversion functions of this C++ class and all those
	/// inherited conversion functions that are visible in this class.
	///
	/// Each of the entries in this overload set is a CXXConversionDecl or a
	/// FunctionTemplateDecl.
	LazyASTUnresolvedSet VisibleConversions;

	/// \brief The declaration which defines this record.
	CXXRecordDecl *Definition;

	/// \brief The first friend declaration in this class, or null if there
	/// aren't any.
	///
	/// This is actually currently stored in reverse order.
	LazyDeclPtr FirstFriend;

	/// \brief Retrieve the set of direct base classes.
	CXXBaseSpecifier *getBases() const {
	if (!Bases.isOffset())
	return Bases.get(nullptr);
	return getBasesSlowCase();
	}

	/// \brief Retrieve the set of virtual base classes.
	CXXBaseSpecifier *getVBases() const {
	if (!VBases.isOffset())
	return VBases.get(nullptr);
	return getVBasesSlowCase();
	}

	ArrayRef<CXXBaseSpecifier> bases() const {
	return llvm::makeArrayRef(getBases(), NumBases);
	}
	ArrayRef<CXXBaseSpecifier> vbases() const {
	return llvm::makeArrayRef(getVBases(), NumVBases);
	}

	private:
	CXXBaseSpecifier *getBasesSlowCase() const;
	CXXBaseSpecifier *getVBasesSlowCase() const;
	};

	struct DefinitionData *DefinitionData;

	/// \brief Describes a C++ closure type (generated by a lambda expression).
	struct LambdaDefinitionData : public DefinitionData {
	typedef LambdaCapture Capture;

	LambdaDefinitionData(CXXRecordDecl D, TypeSourceInfo Info,
	bool Dependent, bool IsGeneric,
	LambdaCaptureDefault CaptureDefault)
	: DefinitionData(D), Dependent(Dependent), IsGenericLambda(IsGeneric),
	CaptureDefault(CaptureDefault), NumCaptures(0), NumExplicitCaptures(0),
	ManglingNumber(0), ContextDecl(nullptr), Captures(nullptr),
	MethodTyInfo(Info) {
	IsLambda = true;

	// C++1z [expr.prim.lambda]p4:
	// This class type is not an aggregate type.
	Aggregate = false;
	PlainOldData = false;
	}

	/// \brief Whether this lambda is known to be dependent, even if its
	/// context isn't dependent.
	///
	/// A lambda with a non-dependent context can be dependent if it occurs
	/// within the default argument of a function template, because the
	/// lambda will have been created with the enclosing context as its
	/// declaration context, rather than function. This is an unfortunate
	/// artifact of having to parse the default arguments before.
	unsigned Dependent : 1;

	/// \brief Whether this lambda is a generic lambda.
	unsigned IsGenericLambda : 1;

	/// \brief The Default Capture.
	unsigned CaptureDefault : 2;

	/// \brief The number of captures in this lambda is limited 2^NumCaptures.
	unsigned NumCaptures : 15;

	/// \brief The number of explicit captures in this lambda.
	unsigned NumExplicitCaptures : 13;

	/// \brief The number used to indicate this lambda expression for name
	/// mangling in the Itanium C++ ABI.
	unsigned ManglingNumber;

	/// \brief The declaration that provides context for this lambda, if the
	/// actual DeclContext does not suffice. This is used for lambdas that
	/// occur within default arguments of function parameters within the class
	/// or within a data member initializer.
	LazyDeclPtr ContextDecl;

	/// \brief The list of captures, both explicit and implicit, for this
	/// lambda.
	Capture *Captures;

	/// \brief The type of the call method.
	TypeSourceInfo *MethodTyInfo;

	};

	struct DefinitionData *dataPtr() const {
	// Complete the redecl chain (if necessary).
	getMostRecentDecl();
	return DefinitionData;
	}

	struct DefinitionData &data() const {
	auto *DD = dataPtr();
	assert(DD && "queried property of class with no definition");
	return *DD;
	}

	struct LambdaDefinitionData &getLambdaData() const {
	// No update required: a merged definition cannot change any lambda
	// properties.
	auto *DD = DefinitionData;
	assert(DD && DD->IsLambda && "queried lambda property of non-lambda class");
	return static_cast<LambdaDefinitionData&>(*DD);
	}

	/// \brief The template or declaration that this declaration
	/// describes or was instantiated from, respectively.
	///
	/// For non-templates, this value will be null. For record
	/// declarations that describe a class template, this will be a
	/// pointer to a ClassTemplateDecl. For member
	/// classes of class template specializations, this will be the
	/// MemberSpecializationInfo referring to the member class that was
	/// instantiated or specialized.
	llvm::PointerUnion<ClassTemplateDecl, MemberSpecializationInfo>
	TemplateOrInstantiation;

	friend class DeclContext;
	friend class LambdaExpr;

	/// \brief Called from setBases and addedMember to notify the class that a
	/// direct or virtual base class or a member of class type has been added.
	void addedClassSubobject(CXXRecordDecl *Base);

	/// \brief Notify the class that member has been added.
	///
	/// This routine helps maintain information about the class based on which
	/// members have been added. It will be invoked by DeclContext::addDecl()
	/// whenever a member is added to this record.
	void addedMember(Decl *D);

	void markedVirtualFunctionPure();
	friend void FunctionDecl::setPure(bool);

	friend class ASTNodeImporter;

	/// \brief Get the head of our list of friend declarations, possibly
	/// deserializing the friends from an external AST source.
	FriendDecl *getFirstFriend() const;

	protected:
	CXXRecordDecl(Kind K, TagKind TK, const ASTContext &C, DeclContext *DC,
	SourceLocation StartLoc, SourceLocation IdLoc,
	IdentifierInfo Id, CXXRecordDecl PrevDecl);

	public:
	/// \brief Iterator that traverses the base classes of a class.
	typedef CXXBaseSpecifier* base_class_iterator;

	/// \brief Iterator that traverses the base classes of a class.
	typedef const CXXBaseSpecifier* base_class_const_iterator;

	CXXRecordDecl *getCanonicalDecl() override {
	return cast<CXXRecordDecl>(RecordDecl::getCanonicalDecl());
	}
	const CXXRecordDecl *getCanonicalDecl() const {
	return const_cast<CXXRecordDecl*>(this)->getCanonicalDecl();
	}

	CXXRecordDecl *getPreviousDecl() {
	return cast_or_null<CXXRecordDecl>(
	static_cast<RecordDecl *>(this)->getPreviousDecl());
	}
	const CXXRecordDecl *getPreviousDecl() const {
	return const_cast<CXXRecordDecl*>(this)->getPreviousDecl();
	}

	CXXRecordDecl *getMostRecentDecl() {
	return cast<CXXRecordDecl>(
	static_cast<RecordDecl *>(this)->getMostRecentDecl());
	}

	const CXXRecordDecl *getMostRecentDecl() const {
	return const_cast<CXXRecordDecl*>(this)->getMostRecentDecl();
	}

	CXXRecordDecl *getDefinition() const {
	// We only need an update if we don't already know which
	// declaration is the definition.
	auto *DD = DefinitionData ? DefinitionData : dataPtr();
	return DD ? DD->Definition : nullptr;
	}

	bool hasDefinition() const { return DefinitionData \|\| dataPtr(); }

	static CXXRecordDecl Create(const ASTContext &C, TagKind TK, DeclContext DC,
	SourceLocation StartLoc, SourceLocation IdLoc,
	IdentifierInfo *Id,
	CXXRecordDecl *PrevDecl = nullptr,
	bool DelayTypeCreation = false);
	static CXXRecordDecl CreateLambda(const ASTContext &C, DeclContext DC,
	TypeSourceInfo *Info, SourceLocation Loc,
	bool DependentLambda, bool IsGeneric,
	LambdaCaptureDefault CaptureDefault);
	static CXXRecordDecl *CreateDeserialized(const ASTContext &C, unsigned ID);

	bool isDynamicClass() const {
	return data().Polymorphic \|\| data().NumVBases != 0;
	}

	void setIsParsingBaseSpecifiers() { data().IsParsingBaseSpecifiers = true; }

	bool isParsingBaseSpecifiers() const {
	return data().IsParsingBaseSpecifiers;
	}

	unsigned getODRHash() const;

	/// \brief Sets the base classes of this struct or class.
	void setBases(CXXBaseSpecifier const * const *Bases, unsigned NumBases);

	/// \brief Retrieves the number of base classes of this class.
	unsigned getNumBases() const { return data().NumBases; }

	typedef llvm::iterator_range<base_class_iterator> base_class_range;
	typedef llvm::iterator_range<base_class_const_iterator>
	base_class_const_range;

	base_class_range bases() {
	return base_class_range(bases_begin(), bases_end());
	}
	base_class_const_range bases() const {
	return base_class_const_range(bases_begin(), bases_end());
	}

	base_class_iterator bases_begin() { return data().getBases(); }
	base_class_const_iterator bases_begin() const { return data().getBases(); }
	base_class_iterator bases_end() { return bases_begin() + data().NumBases; }
	base_class_const_iterator bases_end() const {
	return bases_begin() + data().NumBases;
	}

	/// \brief Retrieves the number of virtual base classes of this class.
	unsigned getNumVBases() const { return data().NumVBases; }

	base_class_range vbases() {
	return base_class_range(vbases_begin(), vbases_end());
	}
	base_class_const_range vbases() const {
	return base_class_const_range(vbases_begin(), vbases_end());
	}

	base_class_iterator vbases_begin() { return data().getVBases(); }
	base_class_const_iterator vbases_begin() const { return data().getVBases(); }
	base_class_iterator vbases_end() { return vbases_begin() + data().NumVBases; }
	base_class_const_iterator vbases_end() const {
	return vbases_begin() + data().NumVBases;
	}

	/// \brief Determine whether this class has any dependent base classes which
	/// are not the current instantiation.
	bool hasAnyDependentBases() const;

	/// Iterator access to method members. The method iterator visits
	/// all method members of the class, including non-instance methods,
	/// special methods, etc.
	typedef specific_decl_iterator<CXXMethodDecl> method_iterator;
	typedef llvm::iterator_range<specific_decl_iterator<CXXMethodDecl>>
	method_range;

	method_range methods() const {
	return method_range(method_begin(), method_end());
	}

	/// \brief Method begin iterator. Iterates in the order the methods
	/// were declared.
	method_iterator method_begin() const {
	return method_iterator(decls_begin());
	}
	/// \brief Method past-the-end iterator.
	method_iterator method_end() const {
	return method_iterator(decls_end());
	}

	/// Iterator access to constructor members.
	typedef specific_decl_iterator<CXXConstructorDecl> ctor_iterator;
	typedef llvm::iterator_range<specific_decl_iterator<CXXConstructorDecl>>
	ctor_range;

	ctor_range ctors() const { return ctor_range(ctor_begin(), ctor_end()); }

	ctor_iterator ctor_begin() const {
	return ctor_iterator(decls_begin());
	}
	ctor_iterator ctor_end() const {
	return ctor_iterator(decls_end());
	}

	/// An iterator over friend declarations. All of these are defined
	/// in DeclFriend.h.
	class friend_iterator;
	typedef llvm::iterator_range<friend_iterator> friend_range;

	friend_range friends() const;
	friend_iterator friend_begin() const;
	friend_iterator friend_end() const;
	void pushFriendDecl(FriendDecl *FD);

	/// Determines whether this record has any friends.
	bool hasFriends() const {
	return data().FirstFriend.isValid();
	}

	+ /// \brief \c true if a defaulted copy constructor for this class would be
	+ /// deleted.
	+ bool defaultedCopyConstructorIsDeleted() const {
	+ assert((!needsOverloadResolutionForCopyConstructor() \|\|
	+ (data().DeclaredSpecialMembers & SMF_CopyConstructor)) &&
	+ "this property has not yet been computed by Sema");
	+ return data().DefaultedCopyConstructorIsDeleted;
	+ }
	+
	+ /// \brief \c true if a defaulted move constructor for this class would be
	+ /// deleted.
	+ bool defaultedMoveConstructorIsDeleted() const {
	+ assert((!needsOverloadResolutionForMoveConstructor() \|\|
	+ (data().DeclaredSpecialMembers & SMF_MoveConstructor)) &&
	+ "this property has not yet been computed by Sema");
	+ return data().DefaultedMoveConstructorIsDeleted;
	+ }
	+
	+ /// \brief \c true if a defaulted destructor for this class would be deleted.
	+ bool defaultedDestructorIsDeleted() const {
	+ return !data().DefaultedDestructorIsDeleted;
	+ }
	+
	/// \brief \c true if we know for sure that this class has a single,
	+ /// accessible, unambiguous copy constructor that is not deleted.
	+ bool hasSimpleCopyConstructor() const {
	+ return !hasUserDeclaredCopyConstructor() &&
	+ !data().DefaultedCopyConstructorIsDeleted;
	+ }
	+
	+ /// \brief \c true if we know for sure that this class has a single,
	/// accessible, unambiguous move constructor that is not deleted.
	bool hasSimpleMoveConstructor() const {
	return !hasUserDeclaredMoveConstructor() && hasMoveConstructor() &&
	!data().DefaultedMoveConstructorIsDeleted;
	}
	+
	/// \brief \c true if we know for sure that this class has a single,
	/// accessible, unambiguous move assignment operator that is not deleted.
	bool hasSimpleMoveAssignment() const {
	return !hasUserDeclaredMoveAssignment() && hasMoveAssignment() &&
	!data().DefaultedMoveAssignmentIsDeleted;
	}
	+
	/// \brief \c true if we know for sure that this class has an accessible
	/// destructor that is not deleted.
	bool hasSimpleDestructor() const {
	return !hasUserDeclaredDestructor() &&
	!data().DefaultedDestructorIsDeleted;
	}

	/// \brief Determine whether this class has any default constructors.
	bool hasDefaultConstructor() const {
	return (data().DeclaredSpecialMembers & SMF_DefaultConstructor) \|\|
	needsImplicitDefaultConstructor();
	}

	/// \brief Determine if we need to declare a default constructor for
	/// this class.
	///
	/// This value is used for lazy creation of default constructors.
	bool needsImplicitDefaultConstructor() const {
	return !data().UserDeclaredConstructor &&
	!(data().DeclaredSpecialMembers & SMF_DefaultConstructor) &&
	// C++14 [expr.prim.lambda]p20:
	// The closure type associated with a lambda-expression has no
	// default constructor.
	!isLambda();
	}

	/// \brief Determine whether this class has any user-declared constructors.
	///
	/// When true, a default constructor will not be implicitly declared.
	bool hasUserDeclaredConstructor() const {
	return data().UserDeclaredConstructor;
	}

	/// \brief Whether this class has a user-provided default constructor
	/// per C++11.
	bool hasUserProvidedDefaultConstructor() const {
	return data().UserProvidedDefaultConstructor;
	}

	/// \brief Determine whether this class has a user-declared copy constructor.
	///
	/// When false, a copy constructor will be implicitly declared.
	bool hasUserDeclaredCopyConstructor() const {
	return data().UserDeclaredSpecialMembers & SMF_CopyConstructor;
	}

	/// \brief Determine whether this class needs an implicit copy
	/// constructor to be lazily declared.
	bool needsImplicitCopyConstructor() const {
	return !(data().DeclaredSpecialMembers & SMF_CopyConstructor);
	}

	/// \brief Determine whether we need to eagerly declare a defaulted copy
	/// constructor for this class.
	bool needsOverloadResolutionForCopyConstructor() const {
	- return data().HasMutableFields;
	+ // C++17 [class.copy.ctor]p6:
	+ // If the class definition declares a move constructor or move assignment
	+ // operator, the implicitly declared copy constructor is defined as
	+ // deleted.
	+ // In MSVC mode, sometimes a declared move assignment does not delete an
	+ // implicit copy constructor, so defer this choice to Sema.
	+ if (data().UserDeclaredSpecialMembers &
	+ (SMF_MoveConstructor \| SMF_MoveAssignment))
	+ return true;
	+ return data().NeedOverloadResolutionForCopyConstructor;
	}

	/// \brief Determine whether an implicit copy constructor for this type
	/// would have a parameter with a const-qualified reference type.
	bool implicitCopyConstructorHasConstParam() const {
	return data().ImplicitCopyConstructorCanHaveConstParamForNonVBase &&
	(isAbstract() \|\|
	data().ImplicitCopyConstructorCanHaveConstParamForVBase);
	}

	/// \brief Determine whether this class has a copy constructor with
	/// a parameter type which is a reference to a const-qualified type.
	bool hasCopyConstructorWithConstParam() const {
	return data().HasDeclaredCopyConstructorWithConstParam \|\|
	(needsImplicitCopyConstructor() &&
	implicitCopyConstructorHasConstParam());
	}

	/// \brief Whether this class has a user-declared move constructor or
	/// assignment operator.
	///
	/// When false, a move constructor and assignment operator may be
	/// implicitly declared.
	bool hasUserDeclaredMoveOperation() const {
	return data().UserDeclaredSpecialMembers &
	(SMF_MoveConstructor \| SMF_MoveAssignment);
	}

	/// \brief Determine whether this class has had a move constructor
	/// declared by the user.
	bool hasUserDeclaredMoveConstructor() const {
	return data().UserDeclaredSpecialMembers & SMF_MoveConstructor;
	}

	/// \brief Determine whether this class has a move constructor.
	bool hasMoveConstructor() const {
	return (data().DeclaredSpecialMembers & SMF_MoveConstructor) \|\|
	needsImplicitMoveConstructor();
	}

	- /// \brief Set that we attempted to declare an implicitly move
	+ /// \brief Set that we attempted to declare an implicit copy
	/// constructor, but overload resolution failed so we deleted it.
	+ void setImplicitCopyConstructorIsDeleted() {
	+ assert((data().DefaultedCopyConstructorIsDeleted \|\|
	+ needsOverloadResolutionForCopyConstructor()) &&
	+ "Copy constructor should not be deleted");
	+ data().DefaultedCopyConstructorIsDeleted = true;
	+ }
	+
	+ /// \brief Set that we attempted to declare an implicit move
	+ /// constructor, but overload resolution failed so we deleted it.
	void setImplicitMoveConstructorIsDeleted() {
	assert((data().DefaultedMoveConstructorIsDeleted \|\|
	needsOverloadResolutionForMoveConstructor()) &&
	"move constructor should not be deleted");
	data().DefaultedMoveConstructorIsDeleted = true;
	}

	/// \brief Determine whether this class should get an implicit move
	/// constructor or if any existing special member function inhibits this.
	bool needsImplicitMoveConstructor() const {
	return !(data().DeclaredSpecialMembers & SMF_MoveConstructor) &&
	!hasUserDeclaredCopyConstructor() &&
	!hasUserDeclaredCopyAssignment() &&
	!hasUserDeclaredMoveAssignment() &&
	!hasUserDeclaredDestructor();
	}

	/// \brief Determine whether we need to eagerly declare a defaulted move
	/// constructor for this class.
	bool needsOverloadResolutionForMoveConstructor() const {
	return data().NeedOverloadResolutionForMoveConstructor;
	}

	/// \brief Determine whether this class has a user-declared copy assignment
	/// operator.
	///
	/// When false, a copy assigment operator will be implicitly declared.
	bool hasUserDeclaredCopyAssignment() const {
	return data().UserDeclaredSpecialMembers & SMF_CopyAssignment;
	}

	/// \brief Determine whether this class needs an implicit copy
	/// assignment operator to be lazily declared.
	bool needsImplicitCopyAssignment() const {
	return !(data().DeclaredSpecialMembers & SMF_CopyAssignment);
	}

	/// \brief Determine whether we need to eagerly declare a defaulted copy
	/// assignment operator for this class.
	bool needsOverloadResolutionForCopyAssignment() const {
	return data().HasMutableFields;
	}

	/// \brief Determine whether an implicit copy assignment operator for this
	/// type would have a parameter with a const-qualified reference type.
	bool implicitCopyAssignmentHasConstParam() const {
	return data().ImplicitCopyAssignmentHasConstParam;
	}

	/// \brief Determine whether this class has a copy assignment operator with
	/// a parameter type which is a reference to a const-qualified type or is not
	/// a reference.
	bool hasCopyAssignmentWithConstParam() const {
	return data().HasDeclaredCopyAssignmentWithConstParam \|\|
	(needsImplicitCopyAssignment() &&
	implicitCopyAssignmentHasConstParam());
	}

	/// \brief Determine whether this class has had a move assignment
	/// declared by the user.
	bool hasUserDeclaredMoveAssignment() const {
	return data().UserDeclaredSpecialMembers & SMF_MoveAssignment;
	}

	/// \brief Determine whether this class has a move assignment operator.
	bool hasMoveAssignment() const {
	return (data().DeclaredSpecialMembers & SMF_MoveAssignment) \|\|
	needsImplicitMoveAssignment();
	}

	/// \brief Set that we attempted to declare an implicit move assignment
	/// operator, but overload resolution failed so we deleted it.
	void setImplicitMoveAssignmentIsDeleted() {
	assert((data().DefaultedMoveAssignmentIsDeleted \|\|
	needsOverloadResolutionForMoveAssignment()) &&
	"move assignment should not be deleted");
	data().DefaultedMoveAssignmentIsDeleted = true;
	}

	/// \brief Determine whether this class should get an implicit move
	/// assignment operator or if any existing special member function inhibits
	/// this.
	bool needsImplicitMoveAssignment() const {
	return !(data().DeclaredSpecialMembers & SMF_MoveAssignment) &&
	!hasUserDeclaredCopyConstructor() &&
	!hasUserDeclaredCopyAssignment() &&
	!hasUserDeclaredMoveConstructor() &&
	!hasUserDeclaredDestructor() &&
	// C++1z [expr.prim.lambda]p21: "the closure type has a deleted copy
	// assignment operator". The intent is that this counts as a user
	// declared copy assignment, but we do not model it that way.
	!isLambda();
	}

	/// \brief Determine whether we need to eagerly declare a move assignment
	/// operator for this class.
	bool needsOverloadResolutionForMoveAssignment() const {
	return data().NeedOverloadResolutionForMoveAssignment;
	}

	/// \brief Determine whether this class has a user-declared destructor.
	///
	/// When false, a destructor will be implicitly declared.
	bool hasUserDeclaredDestructor() const {
	return data().UserDeclaredSpecialMembers & SMF_Destructor;
	}

	/// \brief Determine whether this class needs an implicit destructor to
	/// be lazily declared.
	bool needsImplicitDestructor() const {
	return !(data().DeclaredSpecialMembers & SMF_Destructor);
	}

	/// \brief Determine whether we need to eagerly declare a destructor for this
	/// class.
	bool needsOverloadResolutionForDestructor() const {
	return data().NeedOverloadResolutionForDestructor;
	}

	/// \brief Determine whether this class describes a lambda function object.
	bool isLambda() const {
	// An update record can't turn a non-lambda into a lambda.
	auto *DD = DefinitionData;
	return DD && DD->IsLambda;
	}

	/// \brief Determine whether this class describes a generic
	/// lambda function object (i.e. function call operator is
	/// a template).
	bool isGenericLambda() const;

	/// \brief Retrieve the lambda call operator of the closure type
	/// if this is a closure type.
	CXXMethodDecl *getLambdaCallOperator() const;

	/// \brief Retrieve the lambda static invoker, the address of which
	/// is returned by the conversion operator, and the body of which
	/// is forwarded to the lambda call operator.
	CXXMethodDecl *getLambdaStaticInvoker() const;

	/// \brief Retrieve the generic lambda's template parameter list.
	/// Returns null if the class does not represent a lambda or a generic
	/// lambda.
	TemplateParameterList *getGenericLambdaTemplateParameterList() const;

	LambdaCaptureDefault getLambdaCaptureDefault() const {
	assert(isLambda());
	return static_cast<LambdaCaptureDefault>(getLambdaData().CaptureDefault);
	}

	/// \brief For a closure type, retrieve the mapping from captured
	/// variables and \c this to the non-static data members that store the
	/// values or references of the captures.
	///
	/// \param Captures Will be populated with the mapping from captured
	/// variables to the corresponding fields.
	///
	/// \param ThisCapture Will be set to the field declaration for the
	/// \c this capture.
	///
	/// \note No entries will be added for init-captures, as they do not capture
	/// variables.
	void getCaptureFields(llvm::DenseMap<const VarDecl , FieldDecl > &Captures,
	FieldDecl *&ThisCapture) const;

	typedef const LambdaCapture *capture_const_iterator;
	typedef llvm::iterator_range<capture_const_iterator> capture_const_range;

	capture_const_range captures() const {
	return capture_const_range(captures_begin(), captures_end());
	}
	capture_const_iterator captures_begin() const {
	return isLambda() ? getLambdaData().Captures : nullptr;
	}
	capture_const_iterator captures_end() const {
	return isLambda() ? captures_begin() + getLambdaData().NumCaptures
	: nullptr;
	}

	typedef UnresolvedSetIterator conversion_iterator;
	conversion_iterator conversion_begin() const {
	return data().Conversions.get(getASTContext()).begin();
	}
	conversion_iterator conversion_end() const {
	return data().Conversions.get(getASTContext()).end();
	}

	/// Removes a conversion function from this class. The conversion
	/// function must currently be a member of this class. Furthermore,
	/// this class must currently be in the process of being defined.
	void removeConversion(const NamedDecl *Old);

	/// \brief Get all conversion functions visible in current class,
	/// including conversion function templates.
	llvm::iterator_range<conversion_iterator> getVisibleConversionFunctions();

	/// Determine whether this class is an aggregate (C++ [dcl.init.aggr]),
	/// which is a class with no user-declared constructors, no private
	/// or protected non-static data members, no base classes, and no virtual
	/// functions (C++ [dcl.init.aggr]p1).
	bool isAggregate() const { return data().Aggregate; }

	/// \brief Whether this class has any in-class initializers
	/// for non-static data members (including those in anonymous unions or
	/// structs).
	bool hasInClassInitializer() const { return data().HasInClassInitializer; }

	/// \brief Whether this class or any of its subobjects has any members of
	/// reference type which would make value-initialization ill-formed.
	///
	/// Per C++03 [dcl.init]p5:
	/// - if T is a non-union class type without a user-declared constructor,
	/// then every non-static data member and base-class component of T is
	/// value-initialized [...] A program that calls for [...]
	/// value-initialization of an entity of reference type is ill-formed.
	bool hasUninitializedReferenceMember() const {
	return !isUnion() && !hasUserDeclaredConstructor() &&
	data().HasUninitializedReferenceMember;
	}

	/// \brief Whether this class is a POD-type (C++ [class]p4)
	///
	/// For purposes of this function a class is POD if it is an aggregate
	/// that has no non-static non-POD data members, no reference data
	/// members, no user-defined copy assignment operator and no
	/// user-defined destructor.
	///
	/// Note that this is the C++ TR1 definition of POD.
	bool isPOD() const { return data().PlainOldData; }

	/// \brief True if this class is C-like, without C++-specific features, e.g.
	/// it contains only public fields, no bases, tag kind is not 'class', etc.
	bool isCLike() const;

	/// \brief Determine whether this is an empty class in the sense of
	/// (C++11 [meta.unary.prop]).
	///
	/// The CXXRecordDecl is a class type, but not a union type,
	/// with no non-static data members other than bit-fields of length 0,
	/// no virtual member functions, no virtual base classes,
	/// and no base class B for which is_empty<B>::value is false.
	///
	/// \note This does NOT include a check for union-ness.
	bool isEmpty() const { return data().Empty; }

	/// \brief Determine whether this class has direct non-static data members.
	bool hasDirectFields() const {
	auto &D = data();
	return D.HasPublicFields \|\| D.HasProtectedFields \|\| D.HasPrivateFields;
	}

	/// Whether this class is polymorphic (C++ [class.virtual]),
	/// which means that the class contains or inherits a virtual function.
	bool isPolymorphic() const { return data().Polymorphic; }

	/// \brief Determine whether this class has a pure virtual function.
	///
	/// The class is is abstract per (C++ [class.abstract]p2) if it declares
	/// a pure virtual function or inherits a pure virtual function that is
	/// not overridden.
	bool isAbstract() const { return data().Abstract; }

	/// \brief Determine whether this class has standard layout per
	/// (C++ [class]p7)
	bool isStandardLayout() const { return data().IsStandardLayout; }

	/// \brief Determine whether this class, or any of its class subobjects,
	/// contains a mutable field.
	bool hasMutableFields() const { return data().HasMutableFields; }

	/// \brief Determine whether this class has any variant members.
	bool hasVariantMembers() const { return data().HasVariantMembers; }

	/// \brief Determine whether this class has a trivial default constructor
	/// (C++11 [class.ctor]p5).
	bool hasTrivialDefaultConstructor() const {
	return hasDefaultConstructor() &&
	(data().HasTrivialSpecialMembers & SMF_DefaultConstructor);
	}

	/// \brief Determine whether this class has a non-trivial default constructor
	/// (C++11 [class.ctor]p5).
	bool hasNonTrivialDefaultConstructor() const {
	return (data().DeclaredNonTrivialSpecialMembers & SMF_DefaultConstructor) \|\|
	(needsImplicitDefaultConstructor() &&
	!(data().HasTrivialSpecialMembers & SMF_DefaultConstructor));
	}

	/// \brief Determine whether this class has at least one constexpr constructor
	/// other than the copy or move constructors.
	bool hasConstexprNonCopyMoveConstructor() const {
	return data().HasConstexprNonCopyMoveConstructor \|\|
	(needsImplicitDefaultConstructor() &&
	defaultedDefaultConstructorIsConstexpr());
	}

	/// \brief Determine whether a defaulted default constructor for this class
	/// would be constexpr.
	bool defaultedDefaultConstructorIsConstexpr() const {
	return data().DefaultedDefaultConstructorIsConstexpr &&
	(!isUnion() \|\| hasInClassInitializer() \|\| !hasVariantMembers());
	}

	/// \brief Determine whether this class has a constexpr default constructor.
	bool hasConstexprDefaultConstructor() const {
	return data().HasConstexprDefaultConstructor \|\|
	(needsImplicitDefaultConstructor() &&
	defaultedDefaultConstructorIsConstexpr());
	}

	/// \brief Determine whether this class has a trivial copy constructor
	/// (C++ [class.copy]p6, C++11 [class.copy]p12)
	bool hasTrivialCopyConstructor() const {
	return data().HasTrivialSpecialMembers & SMF_CopyConstructor;
	}

	/// \brief Determine whether this class has a non-trivial copy constructor
	/// (C++ [class.copy]p6, C++11 [class.copy]p12)
	bool hasNonTrivialCopyConstructor() const {
	return data().DeclaredNonTrivialSpecialMembers & SMF_CopyConstructor \|\|
	!hasTrivialCopyConstructor();
	}

	/// \brief Determine whether this class has a trivial move constructor
	/// (C++11 [class.copy]p12)
	bool hasTrivialMoveConstructor() const {
	return hasMoveConstructor() &&
	(data().HasTrivialSpecialMembers & SMF_MoveConstructor);
	}

	/// \brief Determine whether this class has a non-trivial move constructor
	/// (C++11 [class.copy]p12)
	bool hasNonTrivialMoveConstructor() const {
	return (data().DeclaredNonTrivialSpecialMembers & SMF_MoveConstructor) \|\|
	(needsImplicitMoveConstructor() &&
	!(data().HasTrivialSpecialMembers & SMF_MoveConstructor));
	}

	/// \brief Determine whether this class has a trivial copy assignment operator
	/// (C++ [class.copy]p11, C++11 [class.copy]p25)
	bool hasTrivialCopyAssignment() const {
	return data().HasTrivialSpecialMembers & SMF_CopyAssignment;
	}

	/// \brief Determine whether this class has a non-trivial copy assignment
	/// operator (C++ [class.copy]p11, C++11 [class.copy]p25)
	bool hasNonTrivialCopyAssignment() const {
	return data().DeclaredNonTrivialSpecialMembers & SMF_CopyAssignment \|\|
	!hasTrivialCopyAssignment();
	}

	/// \brief Determine whether this class has a trivial move assignment operator
	/// (C++11 [class.copy]p25)
	bool hasTrivialMoveAssignment() const {
	return hasMoveAssignment() &&
	(data().HasTrivialSpecialMembers & SMF_MoveAssignment);
	}

	/// \brief Determine whether this class has a non-trivial move assignment
	/// operator (C++11 [class.copy]p25)
	bool hasNonTrivialMoveAssignment() const {
	return (data().DeclaredNonTrivialSpecialMembers & SMF_MoveAssignment) \|\|
	(needsImplicitMoveAssignment() &&
	!(data().HasTrivialSpecialMembers & SMF_MoveAssignment));
	}

	/// \brief Determine whether this class has a trivial destructor
	/// (C++ [class.dtor]p3)
	bool hasTrivialDestructor() const {
	return data().HasTrivialSpecialMembers & SMF_Destructor;
	}

	/// \brief Determine whether this class has a non-trivial destructor
	/// (C++ [class.dtor]p3)
	bool hasNonTrivialDestructor() const {
	return !(data().HasTrivialSpecialMembers & SMF_Destructor);
	}

	/// \brief Determine whether declaring a const variable with this type is ok
	/// per core issue 253.
	bool allowConstDefaultInit() const {
	return !data().HasUninitializedFields \|\|
	!(data().HasDefaultedDefaultConstructor \|\|
	needsImplicitDefaultConstructor());
	}

	/// \brief Determine whether this class has a destructor which has no
	/// semantic effect.
	///
	/// Any such destructor will be trivial, public, defaulted and not deleted,
	/// and will call only irrelevant destructors.
	bool hasIrrelevantDestructor() const {
	return data().HasIrrelevantDestructor;
	+ }
	+
	+ /// \brief Determine whether this class has at least one trivial, non-deleted
	+ /// copy or move constructor.
	+ bool canPassInRegisters() const {
	+ return data().CanPassInRegisters;
	+ }
	+
	+ /// \brief Set that we can pass this RecordDecl in registers.
	+ // FIXME: This should be set as part of completeDefinition.
	+ void setCanPassInRegisters(bool CanPass) {
	+ data().CanPassInRegisters = CanPass;
	}

	/// \brief Determine whether this class has a non-literal or/ volatile type
	/// non-static data member or base class.
	bool hasNonLiteralTypeFieldsOrBases() const {
	return data().HasNonLiteralTypeFieldsOrBases;
	}

	/// \brief Determine whether this class has a using-declaration that names
	/// a user-declared base class constructor.
	bool hasInheritedConstructor() const {
	return data().HasInheritedConstructor;
	}

	/// \brief Determine whether this class has a using-declaration that names
	/// a base class assignment operator.
	bool hasInheritedAssignment() const {
	return data().HasInheritedAssignment;
	}

	/// \brief Determine whether this class is considered trivially copyable per
	/// (C++11 [class]p6).
	bool isTriviallyCopyable() const;

	/// \brief Determine whether this class is considered trivial.
	///
	/// C++11 [class]p6:
	/// "A trivial class is a class that has a trivial default constructor and
	/// is trivially copiable."
	bool isTrivial() const {
	return isTriviallyCopyable() && hasTrivialDefaultConstructor();
	}

	/// \brief Determine whether this class is a literal type.
	///
	/// C++11 [basic.types]p10:
	/// A class type that has all the following properties:
	/// - it has a trivial destructor
	/// - every constructor call and full-expression in the
	/// brace-or-equal-intializers for non-static data members (if any) is
	/// a constant expression.
	/// - it is an aggregate type or has at least one constexpr constructor
	/// or constructor template that is not a copy or move constructor, and
	/// - all of its non-static data members and base classes are of literal
	/// types
	///
	/// We resolve DR1361 by ignoring the second bullet. We resolve DR1452 by
	/// treating types with trivial default constructors as literal types.
	///
	/// Only in C++1z and beyond, are lambdas literal types.
	bool isLiteral() const {
	return hasTrivialDestructor() &&
	(!isLambda() \|\| getASTContext().getLangOpts().CPlusPlus1z) &&
	!hasNonLiteralTypeFieldsOrBases() &&
	(isAggregate() \|\| isLambda() \|\|
	hasConstexprNonCopyMoveConstructor() \|\|
	hasTrivialDefaultConstructor());
	}

	/// \brief If this record is an instantiation of a member class,
	/// retrieves the member class from which it was instantiated.
	///
	/// This routine will return non-null for (non-templated) member
	/// classes of class templates. For example, given:
	///
	/// \code
	/// template<typename T>
	/// struct X {
	/// struct A { };
	/// };
	/// \endcode
	///
	/// The declaration for X<int>::A is a (non-templated) CXXRecordDecl
	/// whose parent is the class template specialization X<int>. For
	/// this declaration, getInstantiatedFromMemberClass() will return
	/// the CXXRecordDecl X<T>::A. When a complete definition of
	/// X<int>::A is required, it will be instantiated from the
	/// declaration returned by getInstantiatedFromMemberClass().
	CXXRecordDecl *getInstantiatedFromMemberClass() const;

	/// \brief If this class is an instantiation of a member class of a
	/// class template specialization, retrieves the member specialization
	/// information.
	MemberSpecializationInfo *getMemberSpecializationInfo() const;

	/// \brief Specify that this record is an instantiation of the
	/// member class \p RD.
	void setInstantiationOfMemberClass(CXXRecordDecl *RD,
	TemplateSpecializationKind TSK);

	/// \brief Retrieves the class template that is described by this
	/// class declaration.
	///
	/// Every class template is represented as a ClassTemplateDecl and a
	/// CXXRecordDecl. The former contains template properties (such as
	/// the template parameter lists) while the latter contains the
	/// actual description of the template's
	/// contents. ClassTemplateDecl::getTemplatedDecl() retrieves the
	/// CXXRecordDecl that from a ClassTemplateDecl, while
	/// getDescribedClassTemplate() retrieves the ClassTemplateDecl from
	/// a CXXRecordDecl.
	ClassTemplateDecl *getDescribedClassTemplate() const;

	void setDescribedClassTemplate(ClassTemplateDecl *Template);

	/// \brief Determine whether this particular class is a specialization or
	/// instantiation of a class template or member class of a class template,
	/// and how it was instantiated or specialized.
	TemplateSpecializationKind getTemplateSpecializationKind() const;

	/// \brief Set the kind of specialization or template instantiation this is.
	void setTemplateSpecializationKind(TemplateSpecializationKind TSK);

	/// \brief Retrieve the record declaration from which this record could be
	/// instantiated. Returns null if this class is not a template instantiation.
	const CXXRecordDecl *getTemplateInstantiationPattern() const;

	CXXRecordDecl *getTemplateInstantiationPattern() {
	return const_cast<CXXRecordDecl >(const_cast<const CXXRecordDecl >(this)
	->getTemplateInstantiationPattern());
	}

	/// \brief Returns the destructor decl for this class.
	CXXDestructorDecl *getDestructor() const;

	/// \brief Returns true if the class destructor, or any implicitly invoked
	/// destructors are marked noreturn.
	bool isAnyDestructorNoReturn() const;

	/// \brief If the class is a local class [class.local], returns
	/// the enclosing function declaration.
	const FunctionDecl *isLocalClass() const {
	if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(getDeclContext()))
	return RD->isLocalClass();

	return dyn_cast<FunctionDecl>(getDeclContext());
	}

	FunctionDecl *isLocalClass() {
	return const_cast<FunctionDecl*>(
	const_cast<const CXXRecordDecl*>(this)->isLocalClass());
	}

	/// \brief Determine whether this dependent class is a current instantiation,
	/// when viewed from within the given context.
	bool isCurrentInstantiation(const DeclContext *CurContext) const;

	/// \brief Determine whether this class is derived from the class \p Base.
	///
	/// This routine only determines whether this class is derived from \p Base,
	/// but does not account for factors that may make a Derived -> Base class
	/// ill-formed, such as private/protected inheritance or multiple, ambiguous
	/// base class subobjects.
	///
	/// \param Base the base class we are searching for.
	///
	/// \returns true if this class is derived from Base, false otherwise.
	bool isDerivedFrom(const CXXRecordDecl *Base) const;

	/// \brief Determine whether this class is derived from the type \p Base.
	///
	/// This routine only determines whether this class is derived from \p Base,
	/// but does not account for factors that may make a Derived -> Base class
	/// ill-formed, such as private/protected inheritance or multiple, ambiguous
	/// base class subobjects.
	///
	/// \param Base the base class we are searching for.
	///
	/// \param Paths will contain the paths taken from the current class to the
	/// given \p Base class.
	///
	/// \returns true if this class is derived from \p Base, false otherwise.
	///
	/// \todo add a separate parameter to configure IsDerivedFrom, rather than
	/// tangling input and output in \p Paths
	bool isDerivedFrom(const CXXRecordDecl *Base, CXXBasePaths &Paths) const;

	/// \brief Determine whether this class is virtually derived from
	/// the class \p Base.
	///
	/// This routine only determines whether this class is virtually
	/// derived from \p Base, but does not account for factors that may
	/// make a Derived -> Base class ill-formed, such as
	/// private/protected inheritance or multiple, ambiguous base class
	/// subobjects.
	///
	/// \param Base the base class we are searching for.
	///
	/// \returns true if this class is virtually derived from Base,
	/// false otherwise.
	bool isVirtuallyDerivedFrom(const CXXRecordDecl *Base) const;

	/// \brief Determine whether this class is provably not derived from
	/// the type \p Base.
	bool isProvablyNotDerivedFrom(const CXXRecordDecl *Base) const;

	/// \brief Function type used by forallBases() as a callback.
	///
	/// \param BaseDefinition the definition of the base class
	///
	/// \returns true if this base matched the search criteria
	typedef llvm::function_ref<bool(const CXXRecordDecl *BaseDefinition)>
	ForallBasesCallback;

	/// \brief Determines if the given callback holds for all the direct
	/// or indirect base classes of this type.
	///
	/// The class itself does not count as a base class. This routine
	/// returns false if the class has non-computable base classes.
	///
	/// \param BaseMatches Callback invoked for each (direct or indirect) base
	/// class of this type, or if \p AllowShortCircuit is true then until a call
	/// returns false.
	///
	/// \param AllowShortCircuit if false, forces the callback to be called
	/// for every base class, even if a dependent or non-matching base was
	/// found.
	bool forallBases(ForallBasesCallback BaseMatches,
	bool AllowShortCircuit = true) const;

	/// \brief Function type used by lookupInBases() to determine whether a
	/// specific base class subobject matches the lookup criteria.
	///
	/// \param Specifier the base-class specifier that describes the inheritance
	/// from the base class we are trying to match.
	///
	/// \param Path the current path, from the most-derived class down to the
	/// base named by the \p Specifier.
	///
	/// \returns true if this base matched the search criteria, false otherwise.
	typedef llvm::function_ref<bool(const CXXBaseSpecifier *Specifier,
	CXXBasePath &Path)> BaseMatchesCallback;

	/// \brief Look for entities within the base classes of this C++ class,
	/// transitively searching all base class subobjects.
	///
	/// This routine uses the callback function \p BaseMatches to find base
	/// classes meeting some search criteria, walking all base class subobjects
	/// and populating the given \p Paths structure with the paths through the
	/// inheritance hierarchy that resulted in a match. On a successful search,
	/// the \p Paths structure can be queried to retrieve the matching paths and
	/// to determine if there were any ambiguities.
	///
	/// \param BaseMatches callback function used to determine whether a given
	/// base matches the user-defined search criteria.
	///
	/// \param Paths used to record the paths from this class to its base class
	/// subobjects that match the search criteria.
	///
	/// \param LookupInDependent can be set to true to extend the search to
	/// dependent base classes.
	///
	/// \returns true if there exists any path from this class to a base class
	/// subobject that matches the search criteria.
	bool lookupInBases(BaseMatchesCallback BaseMatches, CXXBasePaths &Paths,
	bool LookupInDependent = false) const;

	/// \brief Base-class lookup callback that determines whether the given
	/// base class specifier refers to a specific class declaration.
	///
	/// This callback can be used with \c lookupInBases() to determine whether
	/// a given derived class has is a base class subobject of a particular type.
	/// The base record pointer should refer to the canonical CXXRecordDecl of the
	/// base class that we are searching for.
	static bool FindBaseClass(const CXXBaseSpecifier *Specifier,
	CXXBasePath &Path, const CXXRecordDecl *BaseRecord);

	/// \brief Base-class lookup callback that determines whether the
	/// given base class specifier refers to a specific class
	/// declaration and describes virtual derivation.
	///
	/// This callback can be used with \c lookupInBases() to determine
	/// whether a given derived class has is a virtual base class
	/// subobject of a particular type. The base record pointer should
	/// refer to the canonical CXXRecordDecl of the base class that we
	/// are searching for.
	static bool FindVirtualBaseClass(const CXXBaseSpecifier *Specifier,
	CXXBasePath &Path,
	const CXXRecordDecl *BaseRecord);

	/// \brief Base-class lookup callback that determines whether there exists
	/// a tag with the given name.
	///
	/// This callback can be used with \c lookupInBases() to find tag members
	/// of the given name within a C++ class hierarchy.
	static bool FindTagMember(const CXXBaseSpecifier *Specifier,
	CXXBasePath &Path, DeclarationName Name);

	/// \brief Base-class lookup callback that determines whether there exists
	/// a member with the given name.
	///
	/// This callback can be used with \c lookupInBases() to find members
	/// of the given name within a C++ class hierarchy.
	static bool FindOrdinaryMember(const CXXBaseSpecifier *Specifier,
	CXXBasePath &Path, DeclarationName Name);

	/// \brief Base-class lookup callback that determines whether there exists
	/// a member with the given name.
	///
	/// This callback can be used with \c lookupInBases() to find members
	/// of the given name within a C++ class hierarchy, including dependent
	/// classes.
	static bool
	FindOrdinaryMemberInDependentClasses(const CXXBaseSpecifier *Specifier,
	CXXBasePath &Path, DeclarationName Name);

	/// \brief Base-class lookup callback that determines whether there exists
	/// an OpenMP declare reduction member with the given name.
	///
	/// This callback can be used with \c lookupInBases() to find members
	/// of the given name within a C++ class hierarchy.
	static bool FindOMPReductionMember(const CXXBaseSpecifier *Specifier,
	CXXBasePath &Path, DeclarationName Name);

	/// \brief Base-class lookup callback that determines whether there exists
	/// a member with the given name that can be used in a nested-name-specifier.
	///
	/// This callback can be used with \c lookupInBases() to find members of
	/// the given name within a C++ class hierarchy that can occur within
	/// nested-name-specifiers.
	static bool FindNestedNameSpecifierMember(const CXXBaseSpecifier *Specifier,
	CXXBasePath &Path,
	DeclarationName Name);

	/// \brief Retrieve the final overriders for each virtual member
	/// function in the class hierarchy where this class is the
	/// most-derived class in the class hierarchy.
	void getFinalOverriders(CXXFinalOverriderMap &FinaOverriders) const;

	/// \brief Get the indirect primary bases for this class.
	void getIndirectPrimaryBases(CXXIndirectPrimaryBaseSet& Bases) const;

	/// Performs an imprecise lookup of a dependent name in this class.
	///
	/// This function does not follow strict semantic rules and should be used
	/// only when lookup rules can be relaxed, e.g. indexing.
	std::vector<const NamedDecl *>
	lookupDependentName(const DeclarationName &Name,
	llvm::function_ref<bool(const NamedDecl *ND)> Filter);

	/// Renders and displays an inheritance diagram
	/// for this C++ class and all of its base classes (transitively) using
	/// GraphViz.
	void viewInheritance(ASTContext& Context) const;

	/// \brief Calculates the access of a decl that is reached
	/// along a path.
	static AccessSpecifier MergeAccess(AccessSpecifier PathAccess,
	AccessSpecifier DeclAccess) {
	assert(DeclAccess != AS_none);
	if (DeclAccess == AS_private) return AS_none;
	return (PathAccess > DeclAccess ? PathAccess : DeclAccess);
	}

	/// \brief Indicates that the declaration of a defaulted or deleted special
	/// member function is now complete.
	void finishedDefaultedOrDeletedMember(CXXMethodDecl *MD);

	/// \brief Indicates that the definition of this class is now complete.
	void completeDefinition() override;

	/// \brief Indicates that the definition of this class is now complete,
	/// and provides a final overrider map to help determine
	///
	/// \param FinalOverriders The final overrider map for this class, which can
	/// be provided as an optimization for abstract-class checking. If NULL,
	/// final overriders will be computed if they are needed to complete the
	/// definition.
	void completeDefinition(CXXFinalOverriderMap *FinalOverriders);

	/// \brief Determine whether this class may end up being abstract, even though
	/// it is not yet known to be abstract.
	///
	/// \returns true if this class is not known to be abstract but has any
	/// base classes that are abstract. In this case, \c completeDefinition()
	/// will need to compute final overriders to determine whether the class is
	/// actually abstract.
	bool mayBeAbstract() const;

	/// \brief If this is the closure type of a lambda expression, retrieve the
	/// number to be used for name mangling in the Itanium C++ ABI.
	///
	/// Zero indicates that this closure type has internal linkage, so the
	/// mangling number does not matter, while a non-zero value indicates which
	/// lambda expression this is in this particular context.
	unsigned getLambdaManglingNumber() const {
	assert(isLambda() && "Not a lambda closure type!");
	return getLambdaData().ManglingNumber;
	}

	/// \brief Retrieve the declaration that provides additional context for a
	/// lambda, when the normal declaration context is not specific enough.
	///
	/// Certain contexts (default arguments of in-class function parameters and
	/// the initializers of data members) have separate name mangling rules for
	/// lambdas within the Itanium C++ ABI. For these cases, this routine provides
	/// the declaration in which the lambda occurs, e.g., the function parameter
	/// or the non-static data member. Otherwise, it returns NULL to imply that
	/// the declaration context suffices.
	Decl *getLambdaContextDecl() const;

	/// \brief Set the mangling number and context declaration for a lambda
	/// class.
	void setLambdaMangling(unsigned ManglingNumber, Decl *ContextDecl) {
	getLambdaData().ManglingNumber = ManglingNumber;
	getLambdaData().ContextDecl = ContextDecl;
	}

	/// \brief Returns the inheritance model used for this record.
	MSInheritanceAttr::Spelling getMSInheritanceModel() const;
	/// \brief Calculate what the inheritance model would be for this class.
	MSInheritanceAttr::Spelling calculateInheritanceModel() const;

	/// In the Microsoft C++ ABI, use zero for the field offset of a null data
	/// member pointer if we can guarantee that zero is not a valid field offset,
	/// or if the member pointer has multiple fields. Polymorphic classes have a
	/// vfptr at offset zero, so we can use zero for null. If there are multiple
	/// fields, we can use zero even if it is a valid field offset because
	/// null-ness testing will check the other fields.
	bool nullFieldOffsetIsZero() const {
	return !MSInheritanceAttr::hasOnlyOneField(/IsMemberFunction=/false,
	getMSInheritanceModel()) \|\|
	(hasDefinition() && isPolymorphic());
	}

	/// \brief Controls when vtordisps will be emitted if this record is used as a
	/// virtual base.
	MSVtorDispAttr::Mode getMSVtorDispMode() const;

	/// \brief Determine whether this lambda expression was known to be dependent
	/// at the time it was created, even if its context does not appear to be
	/// dependent.
	///
	/// This flag is a workaround for an issue with parsing, where default
	/// arguments are parsed before their enclosing function declarations have
	/// been created. This means that any lambda expressions within those
	/// default arguments will have as their DeclContext the context enclosing
	/// the function declaration, which may be non-dependent even when the
	/// function declaration itself is dependent. This flag indicates when we
	/// know that the lambda is dependent despite that.
	bool isDependentLambda() const {
	return isLambda() && getLambdaData().Dependent;
	}

	TypeSourceInfo *getLambdaTypeInfo() const {
	return getLambdaData().MethodTyInfo;
	}

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) {
	return K >= firstCXXRecord && K <= lastCXXRecord;
	}

	friend class ASTDeclReader;
	friend class ASTDeclWriter;
	friend class ASTRecordWriter;
	friend class ASTReader;
	friend class ASTWriter;
	};

	/// \brief Represents a C++ deduction guide declaration.
	///
	/// \code
	/// template<typename T> struct A { A(); A(T); };
	/// A() -> A<int>;
	/// \endcode
	///
	/// In this example, there will be an explicit deduction guide from the
	/// second line, and implicit deduction guide templates synthesized from
	/// the constructors of \c A.
	class CXXDeductionGuideDecl : public FunctionDecl {
	void anchor() override;
	private:
	CXXDeductionGuideDecl(ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
	bool IsExplicit, const DeclarationNameInfo &NameInfo,
	QualType T, TypeSourceInfo *TInfo,
	SourceLocation EndLocation)
	: FunctionDecl(CXXDeductionGuide, C, DC, StartLoc, NameInfo, T, TInfo,
	SC_None, false, false) {
	if (EndLocation.isValid())
	setRangeEnd(EndLocation);
	IsExplicitSpecified = IsExplicit;
	}

	public:
	static CXXDeductionGuideDecl Create(ASTContext &C, DeclContext DC,
	SourceLocation StartLoc, bool IsExplicit,
	const DeclarationNameInfo &NameInfo,
	QualType T, TypeSourceInfo *TInfo,
	SourceLocation EndLocation);

	static CXXDeductionGuideDecl *CreateDeserialized(ASTContext &C, unsigned ID);

	/// Whether this deduction guide is explicit.
	bool isExplicit() const { return IsExplicitSpecified; }

	/// Whether this deduction guide was declared with the 'explicit' specifier.
	bool isExplicitSpecified() const { return IsExplicitSpecified; }

	/// Get the template for which this guide performs deduction.
	TemplateDecl *getDeducedTemplate() const {
	return getDeclName().getCXXDeductionGuideTemplate();
	}

	// Implement isa/cast/dyncast/etc.
	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == CXXDeductionGuide; }

	friend class ASTDeclReader;
	friend class ASTDeclWriter;
	};

	/// \brief Represents a static or instance method of a struct/union/class.
	///
	/// In the terminology of the C++ Standard, these are the (static and
	/// non-static) member functions, whether virtual or not.
	class CXXMethodDecl : public FunctionDecl {
	void anchor() override;
	protected:
	CXXMethodDecl(Kind DK, ASTContext &C, CXXRecordDecl *RD,
	SourceLocation StartLoc, const DeclarationNameInfo &NameInfo,
	QualType T, TypeSourceInfo *TInfo,
	StorageClass SC, bool isInline,
	bool isConstexpr, SourceLocation EndLocation)
	: FunctionDecl(DK, C, RD, StartLoc, NameInfo, T, TInfo,
	SC, isInline, isConstexpr) {
	if (EndLocation.isValid())
	setRangeEnd(EndLocation);
	}

	public:
	static CXXMethodDecl Create(ASTContext &C, CXXRecordDecl RD,
	SourceLocation StartLoc,
	const DeclarationNameInfo &NameInfo,
	QualType T, TypeSourceInfo *TInfo,
	StorageClass SC,
	bool isInline,
	bool isConstexpr,
	SourceLocation EndLocation);

	static CXXMethodDecl *CreateDeserialized(ASTContext &C, unsigned ID);

	bool isStatic() const;
	bool isInstance() const { return !isStatic(); }

	/// Returns true if the given operator is implicitly static in a record
	/// context.
	static bool isStaticOverloadedOperator(OverloadedOperatorKind OOK) {
	// [class.free]p1:
	// Any allocation function for a class T is a static member
	// (even if not explicitly declared static).
	// [class.free]p6 Any deallocation function for a class X is a static member
	// (even if not explicitly declared static).
	return OOK == OO_New \|\| OOK == OO_Array_New \|\| OOK == OO_Delete \|\|
	OOK == OO_Array_Delete;
	}

	bool isConst() const { return getType()->castAs<FunctionType>()->isConst(); }
	bool isVolatile() const { return getType()->castAs<FunctionType>()->isVolatile(); }

	bool isVirtual() const {
	CXXMethodDecl *CD =
	cast<CXXMethodDecl>(const_cast<CXXMethodDecl*>(this)->getCanonicalDecl());

	// Member function is virtual if it is marked explicitly so, or if it is
	// declared in __interface -- then it is automatically pure virtual.
	if (CD->isVirtualAsWritten() \|\| CD->isPure())
	return true;

	return (CD->begin_overridden_methods() != CD->end_overridden_methods());
	}

	/// If it's possible to devirtualize a call to this method, return the called
	/// function. Otherwise, return null.

	/// \param Base The object on which this virtual function is called.
	/// \param IsAppleKext True if we are compiling for Apple kext.
	CXXMethodDecl getDevirtualizedMethod(const Expr Base, bool IsAppleKext);

	const CXXMethodDecl getDevirtualizedMethod(const Expr Base,
	bool IsAppleKext) const {
	return const_cast<CXXMethodDecl *>(this)->getDevirtualizedMethod(
	Base, IsAppleKext);
	}

	/// \brief Determine whether this is a usual deallocation function
	/// (C++ [basic.stc.dynamic.deallocation]p2), which is an overloaded
	/// delete or delete[] operator with a particular signature.
	bool isUsualDeallocationFunction() const;

	/// \brief Determine whether this is a copy-assignment operator, regardless
	/// of whether it was declared implicitly or explicitly.
	bool isCopyAssignmentOperator() const;

	/// \brief Determine whether this is a move assignment operator.
	bool isMoveAssignmentOperator() const;

	CXXMethodDecl *getCanonicalDecl() override {
	return cast<CXXMethodDecl>(FunctionDecl::getCanonicalDecl());
	}
	const CXXMethodDecl *getCanonicalDecl() const {
	return const_cast<CXXMethodDecl*>(this)->getCanonicalDecl();
	}

	CXXMethodDecl *getMostRecentDecl() {
	return cast<CXXMethodDecl>(
	static_cast<FunctionDecl *>(this)->getMostRecentDecl());
	}
	const CXXMethodDecl *getMostRecentDecl() const {
	return const_cast<CXXMethodDecl*>(this)->getMostRecentDecl();
	}

	/// True if this method is user-declared and was not
	/// deleted or defaulted on its first declaration.
	bool isUserProvided() const {
	return !(isDeleted() \|\| getCanonicalDecl()->isDefaulted());
	}

	///
	void addOverriddenMethod(const CXXMethodDecl *MD);

	typedef const CXXMethodDecl const method_iterator;

	method_iterator begin_overridden_methods() const;
	method_iterator end_overridden_methods() const;
	unsigned size_overridden_methods() const;
	typedef ASTContext::overridden_method_range overridden_method_range;
	overridden_method_range overridden_methods() const;

	/// Returns the parent of this method declaration, which
	/// is the class in which this method is defined.
	const CXXRecordDecl *getParent() const {
	return cast<CXXRecordDecl>(FunctionDecl::getParent());
	}

	/// Returns the parent of this method declaration, which
	/// is the class in which this method is defined.
	CXXRecordDecl *getParent() {
	return const_cast<CXXRecordDecl *>(
	cast<CXXRecordDecl>(FunctionDecl::getParent()));
	}

	/// \brief Returns the type of the \c this pointer.
	///
	/// Should only be called for instance (i.e., non-static) methods.
	QualType getThisType(ASTContext &C) const;

	unsigned getTypeQualifiers() const {
	return getType()->getAs<FunctionProtoType>()->getTypeQuals();
	}

	/// \brief Retrieve the ref-qualifier associated with this method.
	///
	/// In the following example, \c f() has an lvalue ref-qualifier, \c g()
	/// has an rvalue ref-qualifier, and \c h() has no ref-qualifier.
	/// @code
	/// struct X {
	/// void f() &;
	/// void g() &&;
	/// void h();
	/// };
	/// @endcode
	RefQualifierKind getRefQualifier() const {
	return getType()->getAs<FunctionProtoType>()->getRefQualifier();
	}

	bool hasInlineBody() const;

	/// \brief Determine whether this is a lambda closure type's static member
	/// function that is used for the result of the lambda's conversion to
	/// function pointer (for a lambda with no captures).
	///
	/// The function itself, if used, will have a placeholder body that will be
	/// supplied by IR generation to either forward to the function call operator
	/// or clone the function call operator.
	bool isLambdaStaticInvoker() const;

	/// \brief Find the method in \p RD that corresponds to this one.
	///
	/// Find if \p RD or one of the classes it inherits from override this method.
	/// If so, return it. \p RD is assumed to be a subclass of the class defining
	/// this method (or be the class itself), unless \p MayBeBase is set to true.
	CXXMethodDecl *
	getCorrespondingMethodInClass(const CXXRecordDecl *RD,
	bool MayBeBase = false);

	const CXXMethodDecl *
	getCorrespondingMethodInClass(const CXXRecordDecl *RD,
	bool MayBeBase = false) const {
	return const_cast<CXXMethodDecl *>(this)
	->getCorrespondingMethodInClass(RD, MayBeBase);
	}

	// Implement isa/cast/dyncast/etc.
	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) {
	return K >= firstCXXMethod && K <= lastCXXMethod;
	}
	};

	/// \brief Represents a C++ base or member initializer.
	///
	/// This is part of a constructor initializer that
	/// initializes one non-static member variable or one base class. For
	/// example, in the following, both 'A(a)' and 'f(3.14159)' are member
	/// initializers:
	///
	/// \code
	/// class A { };
	/// class B : public A {
	/// float f;
	/// public:
	/// B(A& a) : A(a), f(3.14159) { }
	/// };
	/// \endcode
	class CXXCtorInitializer final {
	/// \brief Either the base class name/delegating constructor type (stored as
	/// a TypeSourceInfo*), an normal field (FieldDecl), or an anonymous field
	/// (IndirectFieldDecl*) being initialized.
	llvm::PointerUnion3<TypeSourceInfo , FieldDecl , IndirectFieldDecl *>
	Initializee;

	/// \brief The source location for the field name or, for a base initializer
	/// pack expansion, the location of the ellipsis.
	///
	/// In the case of a delegating
	/// constructor, it will still include the type's source location as the
	/// Initializee points to the CXXConstructorDecl (to allow loop detection).
	SourceLocation MemberOrEllipsisLocation;

	/// \brief The argument used to initialize the base or member, which may
	/// end up constructing an object (when multiple arguments are involved).
	Stmt *Init;

	/// \brief Location of the left paren of the ctor-initializer.
	SourceLocation LParenLoc;

	/// \brief Location of the right paren of the ctor-initializer.
	SourceLocation RParenLoc;

	/// \brief If the initializee is a type, whether that type makes this
	/// a delegating initialization.
	unsigned IsDelegating : 1;

	/// \brief If the initializer is a base initializer, this keeps track
	/// of whether the base is virtual or not.
	unsigned IsVirtual : 1;

	/// \brief Whether or not the initializer is explicitly written
	/// in the sources.
	unsigned IsWritten : 1;

	/// If IsWritten is true, then this number keeps track of the textual order
	/// of this initializer in the original sources, counting from 0.
	unsigned SourceOrder : 13;

	public:
	/// \brief Creates a new base-class initializer.
	explicit
	CXXCtorInitializer(ASTContext &Context, TypeSourceInfo *TInfo, bool IsVirtual,
	SourceLocation L, Expr *Init, SourceLocation R,
	SourceLocation EllipsisLoc);

	/// \brief Creates a new member initializer.
	explicit
	CXXCtorInitializer(ASTContext &Context, FieldDecl *Member,
	SourceLocation MemberLoc, SourceLocation L, Expr *Init,
	SourceLocation R);

	/// \brief Creates a new anonymous field initializer.
	explicit
	CXXCtorInitializer(ASTContext &Context, IndirectFieldDecl *Member,
	SourceLocation MemberLoc, SourceLocation L, Expr *Init,
	SourceLocation R);

	/// \brief Creates a new delegating initializer.
	explicit
	CXXCtorInitializer(ASTContext &Context, TypeSourceInfo *TInfo,
	SourceLocation L, Expr *Init, SourceLocation R);

	/// \brief Determine whether this initializer is initializing a base class.
	bool isBaseInitializer() const {
	return Initializee.is<TypeSourceInfo*>() && !IsDelegating;
	}

	/// \brief Determine whether this initializer is initializing a non-static
	/// data member.
	bool isMemberInitializer() const { return Initializee.is<FieldDecl*>(); }

	bool isAnyMemberInitializer() const {
	return isMemberInitializer() \|\| isIndirectMemberInitializer();
	}

	bool isIndirectMemberInitializer() const {
	return Initializee.is<IndirectFieldDecl*>();
	}

	/// \brief Determine whether this initializer is an implicit initializer
	/// generated for a field with an initializer defined on the member
	/// declaration.
	///
	/// In-class member initializers (also known as "non-static data member
	/// initializations", NSDMIs) were introduced in C++11.
	bool isInClassMemberInitializer() const {
	return Init->getStmtClass() == Stmt::CXXDefaultInitExprClass;
	}

	/// \brief Determine whether this initializer is creating a delegating
	/// constructor.
	bool isDelegatingInitializer() const {
	return Initializee.is<TypeSourceInfo*>() && IsDelegating;
	}

	/// \brief Determine whether this initializer is a pack expansion.
	bool isPackExpansion() const {
	return isBaseInitializer() && MemberOrEllipsisLocation.isValid();
	}

	// \brief For a pack expansion, returns the location of the ellipsis.
	SourceLocation getEllipsisLoc() const {
	assert(isPackExpansion() && "Initializer is not a pack expansion");
	return MemberOrEllipsisLocation;
	}

	/// If this is a base class initializer, returns the type of the
	/// base class with location information. Otherwise, returns an NULL
	/// type location.
	TypeLoc getBaseClassLoc() const;

	/// If this is a base class initializer, returns the type of the base class.
	/// Otherwise, returns null.
	const Type *getBaseClass() const;

	/// Returns whether the base is virtual or not.
	bool isBaseVirtual() const {
	assert(isBaseInitializer() && "Must call this on base initializer!");

	return IsVirtual;
	}

	/// \brief Returns the declarator information for a base class or delegating
	/// initializer.
	TypeSourceInfo *getTypeSourceInfo() const {
	return Initializee.dyn_cast<TypeSourceInfo *>();
	}

	/// \brief If this is a member initializer, returns the declaration of the
	/// non-static data member being initialized. Otherwise, returns null.
	FieldDecl *getMember() const {
	if (isMemberInitializer())
	return Initializee.get<FieldDecl*>();
	return nullptr;
	}
	FieldDecl *getAnyMember() const {
	if (isMemberInitializer())
	return Initializee.get<FieldDecl*>();
	if (isIndirectMemberInitializer())
	return Initializee.get<IndirectFieldDecl*>()->getAnonField();
	return nullptr;
	}

	IndirectFieldDecl *getIndirectMember() const {
	if (isIndirectMemberInitializer())
	return Initializee.get<IndirectFieldDecl*>();
	return nullptr;
	}

	SourceLocation getMemberLocation() const {
	return MemberOrEllipsisLocation;
	}

	/// \brief Determine the source location of the initializer.
	SourceLocation getSourceLocation() const;

	/// \brief Determine the source range covering the entire initializer.
	SourceRange getSourceRange() const LLVM_READONLY;

	/// \brief Determine whether this initializer is explicitly written
	/// in the source code.
	bool isWritten() const { return IsWritten; }

	/// \brief Return the source position of the initializer, counting from 0.
	/// If the initializer was implicit, -1 is returned.
	int getSourceOrder() const {
	return IsWritten ? static_cast<int>(SourceOrder) : -1;
	}

	/// \brief Set the source order of this initializer.
	///
	/// This can only be called once for each initializer; it cannot be called
	/// on an initializer having a positive number of (implicit) array indices.
	///
	/// This assumes that the initializer was written in the source code, and
	/// ensures that isWritten() returns true.
	void setSourceOrder(int Pos) {
	assert(!IsWritten &&
	"setSourceOrder() used on implicit initializer");
	assert(SourceOrder == 0 &&
	"calling twice setSourceOrder() on the same initializer");
	assert(Pos >= 0 &&
	"setSourceOrder() used to make an initializer implicit");
	IsWritten = true;
	SourceOrder = static_cast<unsigned>(Pos);
	}

	SourceLocation getLParenLoc() const { return LParenLoc; }
	SourceLocation getRParenLoc() const { return RParenLoc; }

	/// \brief Get the initializer.
	Expr getInit() const { return static_cast<Expr>(Init); }
	};

	/// Description of a constructor that was inherited from a base class.
	class InheritedConstructor {
	ConstructorUsingShadowDecl *Shadow;
	CXXConstructorDecl *BaseCtor;

	public:
	InheritedConstructor() : Shadow(), BaseCtor() {}
	InheritedConstructor(ConstructorUsingShadowDecl *Shadow,
	CXXConstructorDecl *BaseCtor)
	: Shadow(Shadow), BaseCtor(BaseCtor) {}

	explicit operator bool() const { return Shadow; }

	ConstructorUsingShadowDecl *getShadowDecl() const { return Shadow; }
	CXXConstructorDecl *getConstructor() const { return BaseCtor; }
	};

	/// \brief Represents a C++ constructor within a class.
	///
	/// For example:
	///
	/// \code
	/// class X {
	/// public:
	/// explicit X(int); // represented by a CXXConstructorDecl.
	/// };
	/// \endcode
	class CXXConstructorDecl final
	: public CXXMethodDecl,
	private llvm::TrailingObjects<CXXConstructorDecl, InheritedConstructor> {
	void anchor() override;

	/// \name Support for base and member initializers.
	/// \{
	/// \brief The arguments used to initialize the base or member.
	LazyCXXCtorInitializersPtr CtorInitializers;
	unsigned NumCtorInitializers : 31;
	/// \}

	/// \brief Whether this constructor declaration is an implicitly-declared
	/// inheriting constructor.
	unsigned IsInheritingConstructor : 1;

	CXXConstructorDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
	const DeclarationNameInfo &NameInfo,
	QualType T, TypeSourceInfo *TInfo,
	bool isExplicitSpecified, bool isInline,
	bool isImplicitlyDeclared, bool isConstexpr,
	InheritedConstructor Inherited)
	: CXXMethodDecl(CXXConstructor, C, RD, StartLoc, NameInfo, T, TInfo,
	SC_None, isInline, isConstexpr, SourceLocation()),
	CtorInitializers(nullptr), NumCtorInitializers(0),
	IsInheritingConstructor((bool)Inherited) {
	setImplicit(isImplicitlyDeclared);
	if (Inherited)
	*getTrailingObjects<InheritedConstructor>() = Inherited;
	IsExplicitSpecified = isExplicitSpecified;
	}

	public:
	static CXXConstructorDecl *CreateDeserialized(ASTContext &C, unsigned ID,
	bool InheritsConstructor);
	static CXXConstructorDecl *
	Create(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
	const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
	bool isExplicit, bool isInline, bool isImplicitlyDeclared,
	bool isConstexpr,
	InheritedConstructor Inherited = InheritedConstructor());

	/// \brief Iterates through the member/base initializer list.
	typedef CXXCtorInitializer **init_iterator;

	/// \brief Iterates through the member/base initializer list.
	typedef CXXCtorInitializer const init_const_iterator;

	typedef llvm::iterator_range<init_iterator> init_range;
	typedef llvm::iterator_range<init_const_iterator> init_const_range;

	init_range inits() { return init_range(init_begin(), init_end()); }
	init_const_range inits() const {
	return init_const_range(init_begin(), init_end());
	}

	/// \brief Retrieve an iterator to the first initializer.
	init_iterator init_begin() {
	const auto *ConstThis = this;
	return const_cast<init_iterator>(ConstThis->init_begin());
	}
	/// \brief Retrieve an iterator to the first initializer.
	init_const_iterator init_begin() const;

	/// \brief Retrieve an iterator past the last initializer.
	init_iterator init_end() {
	return init_begin() + NumCtorInitializers;
	}
	/// \brief Retrieve an iterator past the last initializer.
	init_const_iterator init_end() const {
	return init_begin() + NumCtorInitializers;
	}

	typedef std::reverse_iterator<init_iterator> init_reverse_iterator;
	typedef std::reverse_iterator<init_const_iterator>
	init_const_reverse_iterator;

	init_reverse_iterator init_rbegin() {
	return init_reverse_iterator(init_end());
	}
	init_const_reverse_iterator init_rbegin() const {
	return init_const_reverse_iterator(init_end());
	}

	init_reverse_iterator init_rend() {
	return init_reverse_iterator(init_begin());
	}
	init_const_reverse_iterator init_rend() const {
	return init_const_reverse_iterator(init_begin());
	}

	/// \brief Determine the number of arguments used to initialize the member
	/// or base.
	unsigned getNumCtorInitializers() const {
	return NumCtorInitializers;
	}

	void setNumCtorInitializers(unsigned numCtorInitializers) {
	NumCtorInitializers = numCtorInitializers;
	}

	void setCtorInitializers(CXXCtorInitializer **Initializers) {
	CtorInitializers = Initializers;
	}

	/// Whether this function is marked as explicit explicitly.
	bool isExplicitSpecified() const { return IsExplicitSpecified; }

	/// Whether this function is explicit.
	bool isExplicit() const {
	return getCanonicalDecl()->isExplicitSpecified();
	}

	/// \brief Determine whether this constructor is a delegating constructor.
	bool isDelegatingConstructor() const {
	return (getNumCtorInitializers() == 1) &&
	init_begin()[0]->isDelegatingInitializer();
	}

	/// \brief When this constructor delegates to another, retrieve the target.
	CXXConstructorDecl *getTargetConstructor() const;

	/// Whether this constructor is a default
	/// constructor (C++ [class.ctor]p5), which can be used to
	/// default-initialize a class of this type.
	bool isDefaultConstructor() const;

	/// \brief Whether this constructor is a copy constructor (C++ [class.copy]p2,
	/// which can be used to copy the class.
	///
	/// \p TypeQuals will be set to the qualifiers on the
	/// argument type. For example, \p TypeQuals would be set to \c
	/// Qualifiers::Const for the following copy constructor:
	///
	/// \code
	/// class X {
	/// public:
	/// X(const X&);
	/// };
	/// \endcode
	bool isCopyConstructor(unsigned &TypeQuals) const;

	/// Whether this constructor is a copy
	/// constructor (C++ [class.copy]p2, which can be used to copy the
	/// class.
	bool isCopyConstructor() const {
	unsigned TypeQuals = 0;
	return isCopyConstructor(TypeQuals);
	}

	/// \brief Determine whether this constructor is a move constructor
	/// (C++11 [class.copy]p3), which can be used to move values of the class.
	///
	/// \param TypeQuals If this constructor is a move constructor, will be set
	/// to the type qualifiers on the referent of the first parameter's type.
	bool isMoveConstructor(unsigned &TypeQuals) const;

	/// \brief Determine whether this constructor is a move constructor
	/// (C++11 [class.copy]p3), which can be used to move values of the class.
	bool isMoveConstructor() const {
	unsigned TypeQuals = 0;
	return isMoveConstructor(TypeQuals);
	}

	/// \brief Determine whether this is a copy or move constructor.
	///
	/// \param TypeQuals Will be set to the type qualifiers on the reference
	/// parameter, if in fact this is a copy or move constructor.
	bool isCopyOrMoveConstructor(unsigned &TypeQuals) const;

	/// \brief Determine whether this a copy or move constructor.
	bool isCopyOrMoveConstructor() const {
	unsigned Quals;
	return isCopyOrMoveConstructor(Quals);
	}

	/// Whether this constructor is a
	/// converting constructor (C++ [class.conv.ctor]), which can be
	/// used for user-defined conversions.
	bool isConvertingConstructor(bool AllowExplicit) const;

	/// \brief Determine whether this is a member template specialization that
	/// would copy the object to itself. Such constructors are never used to copy
	/// an object.
	bool isSpecializationCopyingObject() const;

	/// \brief Determine whether this is an implicit constructor synthesized to
	/// model a call to a constructor inherited from a base class.
	bool isInheritingConstructor() const { return IsInheritingConstructor; }

	/// \brief Get the constructor that this inheriting constructor is based on.
	InheritedConstructor getInheritedConstructor() const {
	return IsInheritingConstructor ? *getTrailingObjects<InheritedConstructor>()
	: InheritedConstructor();
	}

	CXXConstructorDecl *getCanonicalDecl() override {
	return cast<CXXConstructorDecl>(FunctionDecl::getCanonicalDecl());
	}
	const CXXConstructorDecl *getCanonicalDecl() const {
	return const_cast<CXXConstructorDecl*>(this)->getCanonicalDecl();
	}

	// Implement isa/cast/dyncast/etc.
	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == CXXConstructor; }

	friend class ASTDeclReader;
	friend class ASTDeclWriter;
	friend TrailingObjects;
	};

	/// \brief Represents a C++ destructor within a class.
	///
	/// For example:
	///
	/// \code
	/// class X {
	/// public:
	/// ~X(); // represented by a CXXDestructorDecl.
	/// };
	/// \endcode
	class CXXDestructorDecl : public CXXMethodDecl {
	void anchor() override;

	FunctionDecl *OperatorDelete;

	CXXDestructorDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
	const DeclarationNameInfo &NameInfo,
	QualType T, TypeSourceInfo *TInfo,
	bool isInline, bool isImplicitlyDeclared)
	: CXXMethodDecl(CXXDestructor, C, RD, StartLoc, NameInfo, T, TInfo,
	SC_None, isInline, /isConstexpr=/false, SourceLocation()),
	OperatorDelete(nullptr) {
	setImplicit(isImplicitlyDeclared);
	}

	public:
	static CXXDestructorDecl Create(ASTContext &C, CXXRecordDecl RD,
	SourceLocation StartLoc,
	const DeclarationNameInfo &NameInfo,
	QualType T, TypeSourceInfo* TInfo,
	bool isInline,
	bool isImplicitlyDeclared);
	static CXXDestructorDecl *CreateDeserialized(ASTContext & C, unsigned ID);

	void setOperatorDelete(FunctionDecl *OD);
	const FunctionDecl *getOperatorDelete() const {
	return getCanonicalDecl()->OperatorDelete;
	}

	CXXDestructorDecl *getCanonicalDecl() override {
	return cast<CXXDestructorDecl>(FunctionDecl::getCanonicalDecl());
	}
	const CXXDestructorDecl *getCanonicalDecl() const {
	return const_cast<CXXDestructorDecl*>(this)->getCanonicalDecl();
	}

	// Implement isa/cast/dyncast/etc.
	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == CXXDestructor; }

	friend class ASTDeclReader;
	friend class ASTDeclWriter;
	};

	/// \brief Represents a C++ conversion function within a class.
	///
	/// For example:
	///
	/// \code
	/// class X {
	/// public:
	/// operator bool();
	/// };
	/// \endcode
	class CXXConversionDecl : public CXXMethodDecl {
	void anchor() override;

	CXXConversionDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
	const DeclarationNameInfo &NameInfo, QualType T,
	TypeSourceInfo *TInfo, bool isInline,
	bool isExplicitSpecified, bool isConstexpr,
	SourceLocation EndLocation)
	: CXXMethodDecl(CXXConversion, C, RD, StartLoc, NameInfo, T, TInfo,
	SC_None, isInline, isConstexpr, EndLocation) {
	IsExplicitSpecified = isExplicitSpecified;
	}

	public:
	static CXXConversionDecl Create(ASTContext &C, CXXRecordDecl RD,
	SourceLocation StartLoc,
	const DeclarationNameInfo &NameInfo,
	QualType T, TypeSourceInfo *TInfo,
	bool isInline, bool isExplicit,
	bool isConstexpr,
	SourceLocation EndLocation);
	static CXXConversionDecl *CreateDeserialized(ASTContext &C, unsigned ID);

	/// Whether this function is marked as explicit explicitly.
	bool isExplicitSpecified() const { return IsExplicitSpecified; }

	/// Whether this function is explicit.
	bool isExplicit() const {
	return getCanonicalDecl()->isExplicitSpecified();
	}

	/// \brief Returns the type that this conversion function is converting to.
	QualType getConversionType() const {
	return getType()->getAs<FunctionType>()->getReturnType();
	}

	/// \brief Determine whether this conversion function is a conversion from
	/// a lambda closure type to a block pointer.
	bool isLambdaToBlockPointerConversion() const;

	CXXConversionDecl *getCanonicalDecl() override {
	return cast<CXXConversionDecl>(FunctionDecl::getCanonicalDecl());
	}
	const CXXConversionDecl *getCanonicalDecl() const {
	return const_cast<CXXConversionDecl*>(this)->getCanonicalDecl();
	}

	// Implement isa/cast/dyncast/etc.
	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == CXXConversion; }

	friend class ASTDeclReader;
	friend class ASTDeclWriter;
	};

	/// \brief Represents a linkage specification.
	///
	/// For example:
	/// \code
	/// extern "C" void foo();
	/// \endcode
	class LinkageSpecDecl : public Decl, public DeclContext {
	virtual void anchor();
	public:
	/// \brief Represents the language in a linkage specification.
	///
	/// The values are part of the serialization ABI for
	/// ASTs and cannot be changed without altering that ABI. To help
	/// ensure a stable ABI for this, we choose the DW_LANG_ encodings
	/// from the dwarf standard.
	enum LanguageIDs {
	lang_c = /* DW_LANG_C */ 0x0002,
	lang_cxx = /* DW_LANG_C_plus_plus */ 0x0004
	};
	private:
	/// \brief The language for this linkage specification.
	unsigned Language : 3;
	/// \brief True if this linkage spec has braces.
	///
	/// This is needed so that hasBraces() returns the correct result while the
	/// linkage spec body is being parsed. Once RBraceLoc has been set this is
	/// not used, so it doesn't need to be serialized.
	unsigned HasBraces : 1;
	/// \brief The source location for the extern keyword.
	SourceLocation ExternLoc;
	/// \brief The source location for the right brace (if valid).
	SourceLocation RBraceLoc;

	LinkageSpecDecl(DeclContext *DC, SourceLocation ExternLoc,
	SourceLocation LangLoc, LanguageIDs lang, bool HasBraces)
	: Decl(LinkageSpec, DC, LangLoc), DeclContext(LinkageSpec),
	Language(lang), HasBraces(HasBraces), ExternLoc(ExternLoc),
	RBraceLoc(SourceLocation()) { }

	public:
	static LinkageSpecDecl Create(ASTContext &C, DeclContext DC,
	SourceLocation ExternLoc,
	SourceLocation LangLoc, LanguageIDs Lang,
	bool HasBraces);
	static LinkageSpecDecl *CreateDeserialized(ASTContext &C, unsigned ID);

	/// \brief Return the language specified by this linkage specification.
	LanguageIDs getLanguage() const { return LanguageIDs(Language); }
	/// \brief Set the language specified by this linkage specification.
	void setLanguage(LanguageIDs L) { Language = L; }

	/// \brief Determines whether this linkage specification had braces in
	/// its syntactic form.
	bool hasBraces() const {
	assert(!RBraceLoc.isValid() \|\| HasBraces);
	return HasBraces;
	}

	SourceLocation getExternLoc() const { return ExternLoc; }
	SourceLocation getRBraceLoc() const { return RBraceLoc; }
	void setExternLoc(SourceLocation L) { ExternLoc = L; }
	void setRBraceLoc(SourceLocation L) {
	RBraceLoc = L;
	HasBraces = RBraceLoc.isValid();
	}

	SourceLocation getLocEnd() const LLVM_READONLY {
	if (hasBraces())
	return getRBraceLoc();
	// No braces: get the end location of the (only) declaration in context
	// (if present).
	return decls_empty() ? getLocation() : decls_begin()->getLocEnd();
	}

	SourceRange getSourceRange() const override LLVM_READONLY {
	return SourceRange(ExternLoc, getLocEnd());
	}

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == LinkageSpec; }
	static DeclContext castToDeclContext(const LinkageSpecDecl D) {
	return static_cast<DeclContext >(const_cast<LinkageSpecDecl>(D));
	}
	static LinkageSpecDecl castFromDeclContext(const DeclContext DC) {
	return static_cast<LinkageSpecDecl >(const_cast<DeclContext>(DC));
	}
	};

	/// \brief Represents C++ using-directive.
	///
	/// For example:
	/// \code
	/// using namespace std;
	/// \endcode
	///
	/// \note UsingDirectiveDecl should be Decl not NamedDecl, but we provide
	/// artificial names for all using-directives in order to store
	/// them in DeclContext effectively.
	class UsingDirectiveDecl : public NamedDecl {
	void anchor() override;
	/// \brief The location of the \c using keyword.
	SourceLocation UsingLoc;

	/// \brief The location of the \c namespace keyword.
	SourceLocation NamespaceLoc;

	/// \brief The nested-name-specifier that precedes the namespace.
	NestedNameSpecifierLoc QualifierLoc;

	/// \brief The namespace nominated by this using-directive.
	NamedDecl *NominatedNamespace;

	/// Enclosing context containing both using-directive and nominated
	/// namespace.
	DeclContext *CommonAncestor;

	/// \brief Returns special DeclarationName used by using-directives.
	///
	/// This is only used by DeclContext for storing UsingDirectiveDecls in
	/// its lookup structure.
	static DeclarationName getName() {
	return DeclarationName::getUsingDirectiveName();
	}

	UsingDirectiveDecl(DeclContext *DC, SourceLocation UsingLoc,
	SourceLocation NamespcLoc,
	NestedNameSpecifierLoc QualifierLoc,
	SourceLocation IdentLoc,
	NamedDecl *Nominated,
	DeclContext *CommonAncestor)
	: NamedDecl(UsingDirective, DC, IdentLoc, getName()), UsingLoc(UsingLoc),
	NamespaceLoc(NamespcLoc), QualifierLoc(QualifierLoc),
	NominatedNamespace(Nominated), CommonAncestor(CommonAncestor) { }

	public:
	/// \brief Retrieve the nested-name-specifier that qualifies the
	/// name of the namespace, with source-location information.
	NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }

	/// \brief Retrieve the nested-name-specifier that qualifies the
	/// name of the namespace.
	NestedNameSpecifier *getQualifier() const {
	return QualifierLoc.getNestedNameSpecifier();
	}

	NamedDecl *getNominatedNamespaceAsWritten() { return NominatedNamespace; }
	const NamedDecl *getNominatedNamespaceAsWritten() const {
	return NominatedNamespace;
	}

	/// \brief Returns the namespace nominated by this using-directive.
	NamespaceDecl *getNominatedNamespace();

	const NamespaceDecl *getNominatedNamespace() const {
	return const_cast<UsingDirectiveDecl*>(this)->getNominatedNamespace();
	}

	/// \brief Returns the common ancestor context of this using-directive and
	/// its nominated namespace.
	DeclContext *getCommonAncestor() { return CommonAncestor; }
	const DeclContext *getCommonAncestor() const { return CommonAncestor; }

	/// \brief Return the location of the \c using keyword.
	SourceLocation getUsingLoc() const { return UsingLoc; }

	// FIXME: Could omit 'Key' in name.
	/// \brief Returns the location of the \c namespace keyword.
	SourceLocation getNamespaceKeyLocation() const { return NamespaceLoc; }

	/// \brief Returns the location of this using declaration's identifier.
	SourceLocation getIdentLocation() const { return getLocation(); }

	static UsingDirectiveDecl Create(ASTContext &C, DeclContext DC,
	SourceLocation UsingLoc,
	SourceLocation NamespaceLoc,
	NestedNameSpecifierLoc QualifierLoc,
	SourceLocation IdentLoc,
	NamedDecl *Nominated,
	DeclContext *CommonAncestor);
	static UsingDirectiveDecl *CreateDeserialized(ASTContext &C, unsigned ID);

	SourceRange getSourceRange() const override LLVM_READONLY {
	return SourceRange(UsingLoc, getLocation());
	}

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == UsingDirective; }

	// Friend for getUsingDirectiveName.
	friend class DeclContext;

	friend class ASTDeclReader;
	};

	/// \brief Represents a C++ namespace alias.
	///
	/// For example:
	///
	/// \code
	/// namespace Foo = Bar;
	/// \endcode
	class NamespaceAliasDecl : public NamedDecl,
	public Redeclarable<NamespaceAliasDecl> {
	void anchor() override;

	/// \brief The location of the \c namespace keyword.
	SourceLocation NamespaceLoc;

	/// \brief The location of the namespace's identifier.
	///
	/// This is accessed by TargetNameLoc.
	SourceLocation IdentLoc;

	/// \brief The nested-name-specifier that precedes the namespace.
	NestedNameSpecifierLoc QualifierLoc;

	/// \brief The Decl that this alias points to, either a NamespaceDecl or
	/// a NamespaceAliasDecl.
	NamedDecl *Namespace;

	NamespaceAliasDecl(ASTContext &C, DeclContext *DC,
	SourceLocation NamespaceLoc, SourceLocation AliasLoc,
	IdentifierInfo *Alias, NestedNameSpecifierLoc QualifierLoc,
	SourceLocation IdentLoc, NamedDecl *Namespace)
	: NamedDecl(NamespaceAlias, DC, AliasLoc, Alias), redeclarable_base(C),
	NamespaceLoc(NamespaceLoc), IdentLoc(IdentLoc),
	QualifierLoc(QualifierLoc), Namespace(Namespace) {}

	typedef Redeclarable<NamespaceAliasDecl> redeclarable_base;
	NamespaceAliasDecl *getNextRedeclarationImpl() override;
	NamespaceAliasDecl *getPreviousDeclImpl() override;
	NamespaceAliasDecl *getMostRecentDeclImpl() override;

	friend class ASTDeclReader;

	public:
	static NamespaceAliasDecl Create(ASTContext &C, DeclContext DC,
	SourceLocation NamespaceLoc,
	SourceLocation AliasLoc,
	IdentifierInfo *Alias,
	NestedNameSpecifierLoc QualifierLoc,
	SourceLocation IdentLoc,
	NamedDecl *Namespace);

	static NamespaceAliasDecl *CreateDeserialized(ASTContext &C, unsigned ID);

	typedef redeclarable_base::redecl_range redecl_range;
	typedef redeclarable_base::redecl_iterator redecl_iterator;
	using redeclarable_base::redecls_begin;
	using redeclarable_base::redecls_end;
	using redeclarable_base::redecls;
	using redeclarable_base::getPreviousDecl;
	using redeclarable_base::getMostRecentDecl;

	NamespaceAliasDecl *getCanonicalDecl() override {
	return getFirstDecl();
	}
	const NamespaceAliasDecl *getCanonicalDecl() const {
	return getFirstDecl();
	}

	/// \brief Retrieve the nested-name-specifier that qualifies the
	/// name of the namespace, with source-location information.
	NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }

	/// \brief Retrieve the nested-name-specifier that qualifies the
	/// name of the namespace.
	NestedNameSpecifier *getQualifier() const {
	return QualifierLoc.getNestedNameSpecifier();
	}

	/// \brief Retrieve the namespace declaration aliased by this directive.
	NamespaceDecl *getNamespace() {
	if (NamespaceAliasDecl *AD = dyn_cast<NamespaceAliasDecl>(Namespace))
	return AD->getNamespace();

	return cast<NamespaceDecl>(Namespace);
	}

	const NamespaceDecl *getNamespace() const {
	return const_cast<NamespaceAliasDecl*>(this)->getNamespace();
	}

	/// Returns the location of the alias name, i.e. 'foo' in
	/// "namespace foo = ns::bar;".
	SourceLocation getAliasLoc() const { return getLocation(); }

	/// Returns the location of the \c namespace keyword.
	SourceLocation getNamespaceLoc() const { return NamespaceLoc; }

	/// Returns the location of the identifier in the named namespace.
	SourceLocation getTargetNameLoc() const { return IdentLoc; }

	/// \brief Retrieve the namespace that this alias refers to, which
	/// may either be a NamespaceDecl or a NamespaceAliasDecl.
	NamedDecl *getAliasedNamespace() const { return Namespace; }

	SourceRange getSourceRange() const override LLVM_READONLY {
	return SourceRange(NamespaceLoc, IdentLoc);
	}

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == NamespaceAlias; }
	};

	/// \brief Represents a shadow declaration introduced into a scope by a
	/// (resolved) using declaration.
	///
	/// For example,
	/// \code
	/// namespace A {
	/// void foo();
	/// }
	/// namespace B {
	/// using A::foo; // <- a UsingDecl
	/// // Also creates a UsingShadowDecl for A::foo() in B
	/// }
	/// \endcode
	class UsingShadowDecl : public NamedDecl, public Redeclarable<UsingShadowDecl> {
	void anchor() override;

	/// The referenced declaration.
	NamedDecl *Underlying;

	/// \brief The using declaration which introduced this decl or the next using
	/// shadow declaration contained in the aforementioned using declaration.
	NamedDecl *UsingOrNextShadow;
	friend class UsingDecl;

	typedef Redeclarable<UsingShadowDecl> redeclarable_base;
	UsingShadowDecl *getNextRedeclarationImpl() override {
	return getNextRedeclaration();
	}
	UsingShadowDecl *getPreviousDeclImpl() override {
	return getPreviousDecl();
	}
	UsingShadowDecl *getMostRecentDeclImpl() override {
	return getMostRecentDecl();
	}

	protected:
	UsingShadowDecl(Kind K, ASTContext &C, DeclContext *DC, SourceLocation Loc,
	UsingDecl Using, NamedDecl Target);
	UsingShadowDecl(Kind K, ASTContext &C, EmptyShell);

	public:
	static UsingShadowDecl Create(ASTContext &C, DeclContext DC,
	SourceLocation Loc, UsingDecl *Using,
	NamedDecl *Target) {
	return new (C, DC) UsingShadowDecl(UsingShadow, C, DC, Loc, Using, Target);
	}

	static UsingShadowDecl *CreateDeserialized(ASTContext &C, unsigned ID);

	typedef redeclarable_base::redecl_range redecl_range;
	typedef redeclarable_base::redecl_iterator redecl_iterator;
	using redeclarable_base::redecls_begin;
	using redeclarable_base::redecls_end;
	using redeclarable_base::redecls;
	using redeclarable_base::getPreviousDecl;
	using redeclarable_base::getMostRecentDecl;
	using redeclarable_base::isFirstDecl;

	UsingShadowDecl *getCanonicalDecl() override {
	return getFirstDecl();
	}
	const UsingShadowDecl *getCanonicalDecl() const {
	return getFirstDecl();
	}

	/// \brief Gets the underlying declaration which has been brought into the
	/// local scope.
	NamedDecl *getTargetDecl() const { return Underlying; }

	/// \brief Sets the underlying declaration which has been brought into the
	/// local scope.
	void setTargetDecl(NamedDecl* ND) {
	assert(ND && "Target decl is null!");
	Underlying = ND;
	IdentifierNamespace = ND->getIdentifierNamespace();
	}

	/// \brief Gets the using declaration to which this declaration is tied.
	UsingDecl *getUsingDecl() const;

	/// \brief The next using shadow declaration contained in the shadow decl
	/// chain of the using declaration which introduced this decl.
	UsingShadowDecl *getNextUsingShadowDecl() const {
	return dyn_cast_or_null<UsingShadowDecl>(UsingOrNextShadow);
	}

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) {
	return K == Decl::UsingShadow \|\| K == Decl::ConstructorUsingShadow;
	}

	friend class ASTDeclReader;
	friend class ASTDeclWriter;
	};

	/// \brief Represents a shadow constructor declaration introduced into a
	/// class by a C++11 using-declaration that names a constructor.
	///
	/// For example:
	/// \code
	/// struct Base { Base(int); };
	/// struct Derived {
	/// using Base::Base; // creates a UsingDecl and a ConstructorUsingShadowDecl
	/// };
	/// \endcode
	class ConstructorUsingShadowDecl final : public UsingShadowDecl {
	void anchor() override;

	/// \brief If this constructor using declaration inherted the constructor
	/// from an indirect base class, this is the ConstructorUsingShadowDecl
	/// in the named direct base class from which the declaration was inherited.
	ConstructorUsingShadowDecl *NominatedBaseClassShadowDecl;

	/// \brief If this constructor using declaration inherted the constructor
	/// from an indirect base class, this is the ConstructorUsingShadowDecl
	/// that will be used to construct the unique direct or virtual base class
	/// that receives the constructor arguments.
	ConstructorUsingShadowDecl *ConstructedBaseClassShadowDecl;

	/// \brief \c true if the constructor ultimately named by this using shadow
	/// declaration is within a virtual base class subobject of the class that
	/// contains this declaration.
	unsigned IsVirtual : 1;

	ConstructorUsingShadowDecl(ASTContext &C, DeclContext *DC, SourceLocation Loc,
	UsingDecl Using, NamedDecl Target,
	bool TargetInVirtualBase)
	: UsingShadowDecl(ConstructorUsingShadow, C, DC, Loc, Using,
	Target->getUnderlyingDecl()),
	NominatedBaseClassShadowDecl(
	dyn_cast<ConstructorUsingShadowDecl>(Target)),
	ConstructedBaseClassShadowDecl(NominatedBaseClassShadowDecl),
	IsVirtual(TargetInVirtualBase) {
	// If we found a constructor that chains to a constructor for a virtual
	// base, we should directly call that virtual base constructor instead.
	// FIXME: This logic belongs in Sema.
	if (NominatedBaseClassShadowDecl &&
	NominatedBaseClassShadowDecl->constructsVirtualBase()) {
	ConstructedBaseClassShadowDecl =
	NominatedBaseClassShadowDecl->ConstructedBaseClassShadowDecl;
	IsVirtual = true;
	}
	}
	ConstructorUsingShadowDecl(ASTContext &C, EmptyShell Empty)
	: UsingShadowDecl(ConstructorUsingShadow, C, Empty),
	NominatedBaseClassShadowDecl(), ConstructedBaseClassShadowDecl(),
	IsVirtual(false) {}

	public:
	static ConstructorUsingShadowDecl Create(ASTContext &C, DeclContext DC,
	SourceLocation Loc,
	UsingDecl Using, NamedDecl Target,
	bool IsVirtual);
	static ConstructorUsingShadowDecl *CreateDeserialized(ASTContext &C,
	unsigned ID);

	/// Returns the parent of this using shadow declaration, which
	/// is the class in which this is declared.
	//@{
	const CXXRecordDecl *getParent() const {
	return cast<CXXRecordDecl>(getDeclContext());
	}
	CXXRecordDecl *getParent() {
	return cast<CXXRecordDecl>(getDeclContext());
	}
	//@}

	/// \brief Get the inheriting constructor declaration for the direct base
	/// class from which this using shadow declaration was inherited, if there is
	/// one. This can be different for each redeclaration of the same shadow decl.
	ConstructorUsingShadowDecl *getNominatedBaseClassShadowDecl() const {
	return NominatedBaseClassShadowDecl;
	}

	/// \brief Get the inheriting constructor declaration for the base class
	/// for which we don't have an explicit initializer, if there is one.
	ConstructorUsingShadowDecl *getConstructedBaseClassShadowDecl() const {
	return ConstructedBaseClassShadowDecl;
	}

	/// \brief Get the base class that was named in the using declaration. This
	/// can be different for each redeclaration of this same shadow decl.
	CXXRecordDecl *getNominatedBaseClass() const;

	/// \brief Get the base class whose constructor or constructor shadow
	/// declaration is passed the constructor arguments.
	CXXRecordDecl *getConstructedBaseClass() const {
	return cast<CXXRecordDecl>((ConstructedBaseClassShadowDecl
	? ConstructedBaseClassShadowDecl
	: getTargetDecl())
	->getDeclContext());
	}

	/// \brief Returns \c true if the constructed base class is a virtual base
	/// class subobject of this declaration's class.
	bool constructsVirtualBase() const {
	return IsVirtual;
	}

	/// \brief Get the constructor or constructor template in the derived class
	/// correspnding to this using shadow declaration, if it has been implicitly
	/// declared already.
	CXXConstructorDecl *getConstructor() const;
	void setConstructor(NamedDecl *Ctor);

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == ConstructorUsingShadow; }

	friend class ASTDeclReader;
	friend class ASTDeclWriter;
	};

	/// \brief Represents a C++ using-declaration.
	///
	/// For example:
	/// \code
	/// using someNameSpace::someIdentifier;
	/// \endcode
	class UsingDecl : public NamedDecl, public Mergeable<UsingDecl> {
	void anchor() override;

	/// \brief The source location of the 'using' keyword itself.
	SourceLocation UsingLocation;

	/// \brief The nested-name-specifier that precedes the name.
	NestedNameSpecifierLoc QualifierLoc;

	/// \brief Provides source/type location info for the declaration name
	/// embedded in the ValueDecl base class.
	DeclarationNameLoc DNLoc;

	/// \brief The first shadow declaration of the shadow decl chain associated
	/// with this using declaration.
	///
	/// The bool member of the pair store whether this decl has the \c typename
	/// keyword.
	llvm::PointerIntPair<UsingShadowDecl *, 1, bool> FirstUsingShadow;

	UsingDecl(DeclContext *DC, SourceLocation UL,
	NestedNameSpecifierLoc QualifierLoc,
	const DeclarationNameInfo &NameInfo, bool HasTypenameKeyword)
	: NamedDecl(Using, DC, NameInfo.getLoc(), NameInfo.getName()),
	UsingLocation(UL), QualifierLoc(QualifierLoc),
	DNLoc(NameInfo.getInfo()), FirstUsingShadow(nullptr, HasTypenameKeyword) {
	}

	public:
	/// \brief Return the source location of the 'using' keyword.
	SourceLocation getUsingLoc() const { return UsingLocation; }

	/// \brief Set the source location of the 'using' keyword.
	void setUsingLoc(SourceLocation L) { UsingLocation = L; }

	/// \brief Retrieve the nested-name-specifier that qualifies the name,
	/// with source-location information.
	NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }

	/// \brief Retrieve the nested-name-specifier that qualifies the name.
	NestedNameSpecifier *getQualifier() const {
	return QualifierLoc.getNestedNameSpecifier();
	}

	DeclarationNameInfo getNameInfo() const {
	return DeclarationNameInfo(getDeclName(), getLocation(), DNLoc);
	}

	/// \brief Return true if it is a C++03 access declaration (no 'using').
	bool isAccessDeclaration() const { return UsingLocation.isInvalid(); }

	/// \brief Return true if the using declaration has 'typename'.
	bool hasTypename() const { return FirstUsingShadow.getInt(); }

	/// \brief Sets whether the using declaration has 'typename'.
	void setTypename(bool TN) { FirstUsingShadow.setInt(TN); }

	/// \brief Iterates through the using shadow declarations associated with
	/// this using declaration.
	class shadow_iterator {
	/// \brief The current using shadow declaration.
	UsingShadowDecl *Current;

	public:
	typedef UsingShadowDecl* value_type;
	typedef UsingShadowDecl* reference;
	typedef UsingShadowDecl* pointer;
	typedef std::forward_iterator_tag iterator_category;
	typedef std::ptrdiff_t difference_type;

	shadow_iterator() : Current(nullptr) { }
	explicit shadow_iterator(UsingShadowDecl *C) : Current(C) { }

	reference operator*() const { return Current; }
	pointer operator->() const { return Current; }

	shadow_iterator& operator++() {
	Current = Current->getNextUsingShadowDecl();
	return *this;
	}

	shadow_iterator operator++(int) {
	shadow_iterator tmp(*this);
	++(*this);
	return tmp;
	}

	friend bool operator==(shadow_iterator x, shadow_iterator y) {
	return x.Current == y.Current;
	}
	friend bool operator!=(shadow_iterator x, shadow_iterator y) {
	return x.Current != y.Current;
	}
	};

	typedef llvm::iterator_range<shadow_iterator> shadow_range;

	shadow_range shadows() const {
	return shadow_range(shadow_begin(), shadow_end());
	}
	shadow_iterator shadow_begin() const {
	return shadow_iterator(FirstUsingShadow.getPointer());
	}
	shadow_iterator shadow_end() const { return shadow_iterator(); }

	/// \brief Return the number of shadowed declarations associated with this
	/// using declaration.
	unsigned shadow_size() const {
	return std::distance(shadow_begin(), shadow_end());
	}

	void addShadowDecl(UsingShadowDecl *S);
	void removeShadowDecl(UsingShadowDecl *S);

	static UsingDecl Create(ASTContext &C, DeclContext DC,
	SourceLocation UsingL,
	NestedNameSpecifierLoc QualifierLoc,
	const DeclarationNameInfo &NameInfo,
	bool HasTypenameKeyword);

	static UsingDecl *CreateDeserialized(ASTContext &C, unsigned ID);

	SourceRange getSourceRange() const override LLVM_READONLY;

	/// Retrieves the canonical declaration of this declaration.
	UsingDecl *getCanonicalDecl() override { return getFirstDecl(); }
	const UsingDecl *getCanonicalDecl() const { return getFirstDecl(); }

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == Using; }

	friend class ASTDeclReader;
	friend class ASTDeclWriter;
	};

	/// Represents a pack of using declarations that a single
	/// using-declarator pack-expanded into.
	///
	/// \code
	/// template<typename ...T> struct X : T... {
	/// using T::operator()...;
	/// using T::operator T...;
	/// };
	/// \endcode
	///
	/// In the second case above, the UsingPackDecl will have the name
	/// 'operator T' (which contains an unexpanded pack), but the individual
	/// UsingDecls and UsingShadowDecls will have more reasonable names.
	class UsingPackDecl final
	: public NamedDecl, public Mergeable<UsingPackDecl>,
	private llvm::TrailingObjects<UsingPackDecl, NamedDecl *> {
	void anchor() override;

	/// The UnresolvedUsingValueDecl or UnresolvedUsingTypenameDecl from
	/// which this waas instantiated.
	NamedDecl *InstantiatedFrom;

	/// The number of using-declarations created by this pack expansion.
	unsigned NumExpansions;

	UsingPackDecl(DeclContext DC, NamedDecl InstantiatedFrom,
	ArrayRef<NamedDecl *> UsingDecls)
	: NamedDecl(UsingPack, DC,
	InstantiatedFrom ? InstantiatedFrom->getLocation()
	: SourceLocation(),
	InstantiatedFrom ? InstantiatedFrom->getDeclName()
	: DeclarationName()),
	InstantiatedFrom(InstantiatedFrom), NumExpansions(UsingDecls.size()) {
	std::uninitialized_copy(UsingDecls.begin(), UsingDecls.end(),
	getTrailingObjects<NamedDecl *>());
	}

	public:
	/// Get the using declaration from which this was instantiated. This will
	/// always be an UnresolvedUsingValueDecl or an UnresolvedUsingTypenameDecl
	/// that is a pack expansion.
	NamedDecl *getInstantiatedFromUsingDecl() const { return InstantiatedFrom; }

	/// Get the set of using declarations that this pack expanded into. Note that
	/// some of these may still be unresolved.
	ArrayRef<NamedDecl *> expansions() const {
	return llvm::makeArrayRef(getTrailingObjects<NamedDecl *>(), NumExpansions);
	}

	static UsingPackDecl Create(ASTContext &C, DeclContext DC,
	NamedDecl *InstantiatedFrom,
	ArrayRef<NamedDecl *> UsingDecls);

	static UsingPackDecl *CreateDeserialized(ASTContext &C, unsigned ID,
	unsigned NumExpansions);

	SourceRange getSourceRange() const override LLVM_READONLY {
	return InstantiatedFrom->getSourceRange();
	}

	UsingPackDecl *getCanonicalDecl() override { return getFirstDecl(); }
	const UsingPackDecl *getCanonicalDecl() const { return getFirstDecl(); }

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == UsingPack; }

	friend class ASTDeclReader;
	friend class ASTDeclWriter;
	friend TrailingObjects;
	};

	/// \brief Represents a dependent using declaration which was not marked with
	/// \c typename.
	///
	/// Unlike non-dependent using declarations, these only bring through
	/// non-types; otherwise they would break two-phase lookup.
	///
	/// \code
	/// template \<class T> class A : public Base<T> {
	/// using Base<T>::foo;
	/// };
	/// \endcode
	class UnresolvedUsingValueDecl : public ValueDecl,
	public Mergeable<UnresolvedUsingValueDecl> {
	void anchor() override;

	/// \brief The source location of the 'using' keyword
	SourceLocation UsingLocation;

	/// \brief If this is a pack expansion, the location of the '...'.
	SourceLocation EllipsisLoc;

	/// \brief The nested-name-specifier that precedes the name.
	NestedNameSpecifierLoc QualifierLoc;

	/// \brief Provides source/type location info for the declaration name
	/// embedded in the ValueDecl base class.
	DeclarationNameLoc DNLoc;

	UnresolvedUsingValueDecl(DeclContext *DC, QualType Ty,
	SourceLocation UsingLoc,
	NestedNameSpecifierLoc QualifierLoc,
	const DeclarationNameInfo &NameInfo,
	SourceLocation EllipsisLoc)
	: ValueDecl(UnresolvedUsingValue, DC,
	NameInfo.getLoc(), NameInfo.getName(), Ty),
	UsingLocation(UsingLoc), EllipsisLoc(EllipsisLoc),
	QualifierLoc(QualifierLoc), DNLoc(NameInfo.getInfo())
	{ }

	public:
	/// \brief Returns the source location of the 'using' keyword.
	SourceLocation getUsingLoc() const { return UsingLocation; }

	/// \brief Set the source location of the 'using' keyword.
	void setUsingLoc(SourceLocation L) { UsingLocation = L; }

	/// \brief Return true if it is a C++03 access declaration (no 'using').
	bool isAccessDeclaration() const { return UsingLocation.isInvalid(); }

	/// \brief Retrieve the nested-name-specifier that qualifies the name,
	/// with source-location information.
	NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }

	/// \brief Retrieve the nested-name-specifier that qualifies the name.
	NestedNameSpecifier *getQualifier() const {
	return QualifierLoc.getNestedNameSpecifier();
	}

	DeclarationNameInfo getNameInfo() const {
	return DeclarationNameInfo(getDeclName(), getLocation(), DNLoc);
	}

	/// \brief Determine whether this is a pack expansion.
	bool isPackExpansion() const {
	return EllipsisLoc.isValid();
	}

	/// \brief Get the location of the ellipsis if this is a pack expansion.
	SourceLocation getEllipsisLoc() const {
	return EllipsisLoc;
	}

	static UnresolvedUsingValueDecl *
	Create(ASTContext &C, DeclContext *DC, SourceLocation UsingLoc,
	NestedNameSpecifierLoc QualifierLoc,
	const DeclarationNameInfo &NameInfo, SourceLocation EllipsisLoc);

	static UnresolvedUsingValueDecl *
	CreateDeserialized(ASTContext &C, unsigned ID);

	SourceRange getSourceRange() const override LLVM_READONLY;

	/// Retrieves the canonical declaration of this declaration.
	UnresolvedUsingValueDecl *getCanonicalDecl() override {
	return getFirstDecl();
	}
	const UnresolvedUsingValueDecl *getCanonicalDecl() const {
	return getFirstDecl();
	}

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == UnresolvedUsingValue; }

	friend class ASTDeclReader;
	friend class ASTDeclWriter;
	};

	/// \brief Represents a dependent using declaration which was marked with
	/// \c typename.
	///
	/// \code
	/// template \<class T> class A : public Base<T> {
	/// using typename Base<T>::foo;
	/// };
	/// \endcode
	///
	/// The type associated with an unresolved using typename decl is
	/// currently always a typename type.
	class UnresolvedUsingTypenameDecl
	: public TypeDecl,
	public Mergeable<UnresolvedUsingTypenameDecl> {
	void anchor() override;

	/// \brief The source location of the 'typename' keyword
	SourceLocation TypenameLocation;

	/// \brief If this is a pack expansion, the location of the '...'.
	SourceLocation EllipsisLoc;

	/// \brief The nested-name-specifier that precedes the name.
	NestedNameSpecifierLoc QualifierLoc;

	UnresolvedUsingTypenameDecl(DeclContext *DC, SourceLocation UsingLoc,
	SourceLocation TypenameLoc,
	NestedNameSpecifierLoc QualifierLoc,
	SourceLocation TargetNameLoc,
	IdentifierInfo *TargetName,
	SourceLocation EllipsisLoc)
	: TypeDecl(UnresolvedUsingTypename, DC, TargetNameLoc, TargetName,
	UsingLoc),
	TypenameLocation(TypenameLoc), EllipsisLoc(EllipsisLoc),
	QualifierLoc(QualifierLoc) { }

	friend class ASTDeclReader;

	public:
	/// \brief Returns the source location of the 'using' keyword.
	SourceLocation getUsingLoc() const { return getLocStart(); }

	/// \brief Returns the source location of the 'typename' keyword.
	SourceLocation getTypenameLoc() const { return TypenameLocation; }

	/// \brief Retrieve the nested-name-specifier that qualifies the name,
	/// with source-location information.
	NestedNameSpecifierLoc getQualifierLoc() const { return QualifierLoc; }

	/// \brief Retrieve the nested-name-specifier that qualifies the name.
	NestedNameSpecifier *getQualifier() const {
	return QualifierLoc.getNestedNameSpecifier();
	}

	DeclarationNameInfo getNameInfo() const {
	return DeclarationNameInfo(getDeclName(), getLocation());
	}

	/// \brief Determine whether this is a pack expansion.
	bool isPackExpansion() const {
	return EllipsisLoc.isValid();
	}

	/// \brief Get the location of the ellipsis if this is a pack expansion.
	SourceLocation getEllipsisLoc() const {
	return EllipsisLoc;
	}

	static UnresolvedUsingTypenameDecl *
	Create(ASTContext &C, DeclContext *DC, SourceLocation UsingLoc,
	SourceLocation TypenameLoc, NestedNameSpecifierLoc QualifierLoc,
	SourceLocation TargetNameLoc, DeclarationName TargetName,
	SourceLocation EllipsisLoc);

	static UnresolvedUsingTypenameDecl *
	CreateDeserialized(ASTContext &C, unsigned ID);

	/// Retrieves the canonical declaration of this declaration.
	UnresolvedUsingTypenameDecl *getCanonicalDecl() override {
	return getFirstDecl();
	}
	const UnresolvedUsingTypenameDecl *getCanonicalDecl() const {
	return getFirstDecl();
	}

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == UnresolvedUsingTypename; }
	};

	/// \brief Represents a C++11 static_assert declaration.
	class StaticAssertDecl : public Decl {
	virtual void anchor();
	llvm::PointerIntPair<Expr *, 1, bool> AssertExprAndFailed;
	StringLiteral *Message;
	SourceLocation RParenLoc;

	StaticAssertDecl(DeclContext *DC, SourceLocation StaticAssertLoc,
	Expr AssertExpr, StringLiteral Message,
	SourceLocation RParenLoc, bool Failed)
	: Decl(StaticAssert, DC, StaticAssertLoc),
	AssertExprAndFailed(AssertExpr, Failed), Message(Message),
	RParenLoc(RParenLoc) { }

	public:
	static StaticAssertDecl Create(ASTContext &C, DeclContext DC,
	SourceLocation StaticAssertLoc,
	Expr AssertExpr, StringLiteral Message,
	SourceLocation RParenLoc, bool Failed);
	static StaticAssertDecl *CreateDeserialized(ASTContext &C, unsigned ID);

	Expr *getAssertExpr() { return AssertExprAndFailed.getPointer(); }
	const Expr *getAssertExpr() const { return AssertExprAndFailed.getPointer(); }

	StringLiteral *getMessage() { return Message; }
	const StringLiteral *getMessage() const { return Message; }

	bool isFailed() const { return AssertExprAndFailed.getInt(); }

	SourceLocation getRParenLoc() const { return RParenLoc; }

	SourceRange getSourceRange() const override LLVM_READONLY {
	return SourceRange(getLocation(), getRParenLoc());
	}

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == StaticAssert; }

	friend class ASTDeclReader;
	};

	/// A binding in a decomposition declaration. For instance, given:
	///
	/// int n[3];
	/// auto &[a, b, c] = n;
	///
	/// a, b, and c are BindingDecls, whose bindings are the expressions
	/// x[0], x[1], and x[2] respectively, where x is the implicit
	/// DecompositionDecl of type 'int (&)[3]'.
	class BindingDecl : public ValueDecl {
	void anchor() override;

	/// The binding represented by this declaration. References to this
	/// declaration are effectively equivalent to this expression (except
	/// that it is only evaluated once at the point of declaration of the
	/// binding).
	Expr *Binding;

	BindingDecl(DeclContext DC, SourceLocation IdLoc, IdentifierInfo Id)
	: ValueDecl(Decl::Binding, DC, IdLoc, Id, QualType()), Binding(nullptr) {}

	public:
	static BindingDecl Create(ASTContext &C, DeclContext DC,
	SourceLocation IdLoc, IdentifierInfo *Id);
	static BindingDecl *CreateDeserialized(ASTContext &C, unsigned ID);

	/// Get the expression to which this declaration is bound. This may be null
	/// in two different cases: while parsing the initializer for the
	/// decomposition declaration, and when the initializer is type-dependent.
	Expr *getBinding() const { return Binding; }

	/// Get the variable (if any) that holds the value of evaluating the binding.
	/// Only present for user-defined bindings for tuple-like types.
	VarDecl *getHoldingVar() const;

	/// Set the binding for this BindingDecl, along with its declared type (which
	/// should be a possibly-cv-qualified form of the type of the binding, or a
	/// reference to such a type).
	void setBinding(QualType DeclaredType, Expr *Binding) {
	setType(DeclaredType);
	this->Binding = Binding;
	}

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == Decl::Binding; }

	friend class ASTDeclReader;
	};

	/// A decomposition declaration. For instance, given:
	///
	/// int n[3];
	/// auto &[a, b, c] = n;
	///
	/// the second line declares a DecompositionDecl of type 'int (&)[3]', and
	/// three BindingDecls (named a, b, and c). An instance of this class is always
	/// unnamed, but behaves in almost all other respects like a VarDecl.
	class DecompositionDecl final
	: public VarDecl,
	private llvm::TrailingObjects<DecompositionDecl, BindingDecl *> {
	void anchor() override;

	/// The number of BindingDecl*s following this object.
	unsigned NumBindings;

	DecompositionDecl(ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
	SourceLocation LSquareLoc, QualType T,
	TypeSourceInfo *TInfo, StorageClass SC,
	ArrayRef<BindingDecl *> Bindings)
	: VarDecl(Decomposition, C, DC, StartLoc, LSquareLoc, nullptr, T, TInfo,
	SC),
	NumBindings(Bindings.size()) {
	std::uninitialized_copy(Bindings.begin(), Bindings.end(),
	getTrailingObjects<BindingDecl *>());
	}

	public:
	static DecompositionDecl Create(ASTContext &C, DeclContext DC,
	SourceLocation StartLoc,
	SourceLocation LSquareLoc,
	QualType T, TypeSourceInfo *TInfo,
	StorageClass S,
	ArrayRef<BindingDecl *> Bindings);
	static DecompositionDecl *CreateDeserialized(ASTContext &C, unsigned ID,
	unsigned NumBindings);

	ArrayRef<BindingDecl *> bindings() const {
	return llvm::makeArrayRef(getTrailingObjects<BindingDecl *>(), NumBindings);
	}

	void printName(raw_ostream &os) const override;

	static bool classof(const Decl *D) { return classofKind(D->getKind()); }
	static bool classofKind(Kind K) { return K == Decomposition; }

	friend TrailingObjects;
	friend class ASTDeclReader;
	};

	/// An instance of this class represents the declaration of a property
	/// member. This is a Microsoft extension to C++, first introduced in
	/// Visual Studio .NET 2003 as a parallel to similar features in C#
	/// and Managed C++.
	///
	/// A property must always be a non-static class member.
	///
	/// A property member superficially resembles a non-static data
	/// member, except preceded by a property attribute:
	/// __declspec(property(get=GetX, put=PutX)) int x;
	/// Either (but not both) of the 'get' and 'put' names may be omitted.
	///
	/// A reference to a property is always an lvalue. If the lvalue
	/// undergoes lvalue-to-rvalue conversion, then a getter name is
	/// required, and that member is called with no arguments.
	/// If the lvalue is assigned into, then a setter name is required,
	/// and that member is called with one argument, the value assigned.
	/// Both operations are potentially overloaded. Compound assignments
	/// are permitted, as are the increment and decrement operators.
	///
	/// The getter and putter methods are permitted to be overloaded,
	/// although their return and parameter types are subject to certain
	/// restrictions according to the type of the property.
	///
	/// A property declared using an incomplete array type may
	/// additionally be subscripted, adding extra parameters to the getter
	/// and putter methods.
	class MSPropertyDecl : public DeclaratorDecl {
	IdentifierInfo GetterId, SetterId;

	MSPropertyDecl(DeclContext *DC, SourceLocation L, DeclarationName N,
	QualType T, TypeSourceInfo *TInfo, SourceLocation StartL,
	IdentifierInfo Getter, IdentifierInfo Setter)
	: DeclaratorDecl(MSProperty, DC, L, N, T, TInfo, StartL),
	GetterId(Getter), SetterId(Setter) {}

	public:
	static MSPropertyDecl Create(ASTContext &C, DeclContext DC,
	SourceLocation L, DeclarationName N, QualType T,
	TypeSourceInfo *TInfo, SourceLocation StartL,
	IdentifierInfo Getter, IdentifierInfo Setter);
	static MSPropertyDecl *CreateDeserialized(ASTContext &C, unsigned ID);

	static bool classof(const Decl *D) { return D->getKind() == MSProperty; }

	bool hasGetter() const { return GetterId != nullptr; }
	IdentifierInfo* getGetterId() const { return GetterId; }
	bool hasSetter() const { return SetterId != nullptr; }
	IdentifierInfo* getSetterId() const { return SetterId; }

	friend class ASTDeclReader;
	};

	/// Insertion operator for diagnostics. This allows sending an AccessSpecifier
	/// into a diagnostic with <<.
	const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB,
	AccessSpecifier AS);

	const PartialDiagnostic &operator<<(const PartialDiagnostic &DB,
	AccessSpecifier AS);

	} // end namespace clang

	#endif
	Index: head/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h
	===================================================================
	--- head/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h (revision 322854)
	+++ head/contrib/llvm/tools/clang/include/clang/Lex/Preprocessor.h (revision 322855)
	@@ -1,2080 +1,2080 @@
	//===--- Preprocessor.h - C Language Family Preprocessor --------- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// \brief Defines the clang::Preprocessor interface.
	///
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
	#define LLVM_CLANG_LEX_PREPROCESSOR_H

	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/Diagnostic.h"
	#include "clang/Basic/IdentifierTable.h"
	#include "clang/Basic/SourceLocation.h"
	#include "clang/Lex/Lexer.h"
	#include "clang/Lex/MacroInfo.h"
	#include "clang/Lex/ModuleMap.h"
	#include "clang/Lex/PPCallbacks.h"
	#include "clang/Lex/PTHLexer.h"
	#include "clang/Lex/TokenLexer.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/IntrusiveRefCntPtr.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/TinyPtrVector.h"
	#include "llvm/Support/Allocator.h"
	#include "llvm/Support/Registry.h"
	#include <memory>
	#include <vector>

	namespace llvm {
	template<unsigned InternalLen> class SmallString;
	}

	namespace clang {

	class SourceManager;
	class ExternalPreprocessorSource;
	class FileManager;
	class FileEntry;
	class HeaderSearch;
	class MemoryBufferCache;
	class PragmaNamespace;
	class PragmaHandler;
	class CommentHandler;
	class ScratchBuffer;
	class TargetInfo;
	class PPCallbacks;
	class CodeCompletionHandler;
	class DirectoryLookup;
	class PreprocessingRecord;
	class ModuleLoader;
	class PTHManager;
	class PreprocessorOptions;

	/// \brief Stores token information for comparing actual tokens with
	/// predefined values. Only handles simple tokens and identifiers.
	class TokenValue {
	tok::TokenKind Kind;
	IdentifierInfo *II;

	public:
	TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
	assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
	assert(Kind != tok::identifier &&
	"Identifiers should be created by TokenValue(IdentifierInfo *)");
	assert(!tok::isLiteral(Kind) && "Literals are not supported.");
	assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
	}
	TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
	bool operator==(const Token &Tok) const {
	return Tok.getKind() == Kind &&
	(!II \|\| II == Tok.getIdentifierInfo());
	}
	};

	/// \brief Context in which macro name is used.
	enum MacroUse {
	MU_Other = 0, // other than #define or #undef
	MU_Define = 1, // macro name specified in #define
	MU_Undef = 2 // macro name specified in #undef
	};

	/// \brief Engages in a tight little dance with the lexer to efficiently
	/// preprocess tokens.
	///
	/// Lexers know only about tokens within a single source file, and don't
	/// know anything about preprocessor-level issues like the \#include stack,
	/// token expansion, etc.
	class Preprocessor {
	std::shared_ptr<PreprocessorOptions> PPOpts;
	DiagnosticsEngine *Diags;
	LangOptions &LangOpts;
	const TargetInfo *Target;
	const TargetInfo *AuxTarget;
	FileManager &FileMgr;
	SourceManager &SourceMgr;
	MemoryBufferCache &PCMCache;
	std::unique_ptr<ScratchBuffer> ScratchBuf;
	HeaderSearch &HeaderInfo;
	ModuleLoader &TheModuleLoader;

	/// \brief External source of macros.
	ExternalPreprocessorSource *ExternalSource;


	/// An optional PTHManager object used for getting tokens from
	/// a token cache rather than lexing the original source file.
	std::unique_ptr<PTHManager> PTH;

	/// A BumpPtrAllocator object used to quickly allocate and release
	/// objects internal to the Preprocessor.
	llvm::BumpPtrAllocator BP;

	/// Identifiers for builtin macros and other builtins.
	IdentifierInfo Ident__LINE__, Ident__FILE__; // __LINE__, __FILE__
	IdentifierInfo Ident__DATE__, Ident__TIME__; // __DATE__, __TIME__
	IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
	IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
	IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
	IdentifierInfo *Ident__COUNTER__; // __COUNTER__
	IdentifierInfo Ident_Pragma, Ident__pragma; // _Pragma, __pragma
	IdentifierInfo *Ident__identifier; // __identifier
	IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
	IdentifierInfo *Ident__has_feature; // __has_feature
	IdentifierInfo *Ident__has_extension; // __has_extension
	IdentifierInfo *Ident__has_builtin; // __has_builtin
	IdentifierInfo *Ident__has_attribute; // __has_attribute
	IdentifierInfo *Ident__has_include; // __has_include
	IdentifierInfo *Ident__has_include_next; // __has_include_next
	IdentifierInfo *Ident__has_warning; // __has_warning
	IdentifierInfo *Ident__is_identifier; // __is_identifier
	IdentifierInfo *Ident__building_module; // __building_module
	IdentifierInfo *Ident__MODULE__; // __MODULE__
	IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
	IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute

	SourceLocation DATELoc, TIMELoc;
	unsigned CounterValue; // Next __COUNTER__ value.

	enum {
	/// \brief Maximum depth of \#includes.
	MaxAllowedIncludeStackDepth = 200
	};

	// State that is set before the preprocessor begins.
	bool KeepComments : 1;
	bool KeepMacroComments : 1;
	bool SuppressIncludeNotFoundError : 1;

	// State that changes while the preprocessor runs:
	bool InMacroArgs : 1; // True if parsing fn macro invocation args.

	/// Whether the preprocessor owns the header search object.
	bool OwnsHeaderSearch : 1;

	/// True if macro expansion is disabled.
	bool DisableMacroExpansion : 1;

	/// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
	/// when parsing preprocessor directives.
	bool MacroExpansionInDirectivesOverride : 1;

	class ResetMacroExpansionHelper;

	/// \brief Whether we have already loaded macros from the external source.
	mutable bool ReadMacrosFromExternalSource : 1;

	/// \brief True if pragmas are enabled.
	bool PragmasEnabled : 1;

	/// \brief True if the current build action is a preprocessing action.
	bool PreprocessedOutput : 1;

	/// \brief True if we are currently preprocessing a #if or #elif directive
	bool ParsingIfOrElifDirective;

	/// \brief True if we are pre-expanding macro arguments.
	bool InMacroArgPreExpansion;

	/// \brief Mapping/lookup information for all identifiers in
	/// the program, including program keywords.
	mutable IdentifierTable Identifiers;

	/// \brief This table contains all the selectors in the program.
	///
	/// Unlike IdentifierTable above, this table isn't populated by the
	/// preprocessor. It is declared/expanded here because its role/lifetime is
	/// conceptually similar to the IdentifierTable. In addition, the current
	/// control flow (in clang::ParseAST()), make it convenient to put here.
	///
	/// FIXME: Make sure the lifetime of Identifiers/Selectors isn't tied to
	/// the lifetime of the preprocessor.
	SelectorTable Selectors;

	/// \brief Information about builtins.
	Builtin::Context BuiltinInfo;

	/// \brief Tracks all of the pragmas that the client registered
	/// with this preprocessor.
	std::unique_ptr<PragmaNamespace> PragmaHandlers;

	/// \brief Pragma handlers of the original source is stored here during the
	/// parsing of a model file.
	std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;

	/// \brief Tracks all of the comment handlers that the client registered
	/// with this preprocessor.
	std::vector<CommentHandler *> CommentHandlers;

	/// \brief True if we want to ignore EOF token and continue later on (thus
	/// avoid tearing the Lexer and etc. down).
	bool IncrementalProcessing;

	/// The kind of translation unit we are processing.
	TranslationUnitKind TUKind;

	/// \brief The code-completion handler.
	CodeCompletionHandler *CodeComplete;

	/// \brief The file that we're performing code-completion for, if any.
	const FileEntry *CodeCompletionFile;

	/// \brief The offset in file for the code-completion point.
	unsigned CodeCompletionOffset;

	/// \brief The location for the code-completion point. This gets instantiated
	/// when the CodeCompletionFile gets \#include'ed for preprocessing.
	SourceLocation CodeCompletionLoc;

	/// \brief The start location for the file of the code-completion point.
	///
	/// This gets instantiated when the CodeCompletionFile gets \#include'ed
	/// for preprocessing.
	SourceLocation CodeCompletionFileLoc;

	/// \brief The source location of the \c import contextual keyword we just
	/// lexed, if any.
	SourceLocation ModuleImportLoc;

	/// \brief The module import path that we're currently processing.
	SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;

	/// \brief Whether the last token we lexed was an '@'.
	bool LastTokenWasAt;

	/// \brief Whether the module import expects an identifier next. Otherwise,
	/// it expects a '.' or ';'.
	bool ModuleImportExpectsIdentifier;

	/// \brief The source location of the currently-active
	/// \#pragma clang arc_cf_code_audited begin.
	SourceLocation PragmaARCCFCodeAuditedLoc;

	/// \brief The source location of the currently-active
	/// \#pragma clang assume_nonnull begin.
	SourceLocation PragmaAssumeNonNullLoc;

	/// \brief True if we hit the code-completion point.
	bool CodeCompletionReached;

	/// \brief The code completion token containing the information
	/// on the stem that is to be code completed.
	IdentifierInfo *CodeCompletionII;

	/// \brief The directory that the main file should be considered to occupy,
	/// if it does not correspond to a real file (as happens when building a
	/// module).
	const DirectoryEntry *MainFileDir;

	/// \brief The number of bytes that we will initially skip when entering the
	/// main file, along with a flag that indicates whether skipping this number
	/// of bytes will place the lexer at the start of a line.
	///
	/// This is used when loading a precompiled preamble.
	std::pair<int, bool> SkipMainFilePreamble;

	class PreambleConditionalStackStore {
	enum State {
	Off = 0,
	Recording = 1,
	Replaying = 2,
	};

	public:
	PreambleConditionalStackStore() : ConditionalStackState(Off) {}

	void startRecording() { ConditionalStackState = Recording; }
	void startReplaying() { ConditionalStackState = Replaying; }
	bool isRecording() const { return ConditionalStackState == Recording; }
	bool isReplaying() const { return ConditionalStackState == Replaying; }

	ArrayRef<PPConditionalInfo> getStack() const {
	return ConditionalStack;
	}

	void doneReplaying() {
	ConditionalStack.clear();
	ConditionalStackState = Off;
	}

	void setStack(ArrayRef<PPConditionalInfo> s) {
	if (!isRecording() && !isReplaying())
	return;
	ConditionalStack.clear();
	ConditionalStack.append(s.begin(), s.end());
	}

	bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }

	private:
	SmallVector<PPConditionalInfo, 4> ConditionalStack;
	State ConditionalStackState;
	} PreambleConditionalStack;

	/// \brief The current top of the stack that we're lexing from if
	/// not expanding a macro and we are lexing directly from source code.
	///
	/// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
	std::unique_ptr<Lexer> CurLexer;

	/// \brief The current top of stack that we're lexing from if
	/// not expanding from a macro and we are lexing from a PTH cache.
	///
	/// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
	std::unique_ptr<PTHLexer> CurPTHLexer;

	/// \brief The current top of the stack what we're lexing from
	/// if not expanding a macro.
	///
	/// This is an alias for either CurLexer or CurPTHLexer.
	PreprocessorLexer *CurPPLexer;

	/// \brief Used to find the current FileEntry, if CurLexer is non-null
	/// and if applicable.
	///
	/// This allows us to implement \#include_next and find directory-specific
	/// properties.
	const DirectoryLookup *CurDirLookup;

	/// \brief The current macro we are expanding, if we are expanding a macro.
	///
	/// One of CurLexer and CurTokenLexer must be null.
	std::unique_ptr<TokenLexer> CurTokenLexer;

	/// \brief The kind of lexer we're currently working with.
	enum CurLexerKind {
	CLK_Lexer,
	CLK_PTHLexer,
	CLK_TokenLexer,
	CLK_CachingLexer,
	CLK_LexAfterModuleImport
	} CurLexerKind;

	/// \brief If the current lexer is for a submodule that is being built, this
	/// is that submodule.
	Module *CurLexerSubmodule;

	/// \brief Keeps track of the stack of files currently
	/// \#included, and macros currently being expanded from, not counting
	/// CurLexer/CurTokenLexer.
	struct IncludeStackInfo {
	enum CurLexerKind CurLexerKind;
	Module *TheSubmodule;
	std::unique_ptr<Lexer> TheLexer;
	std::unique_ptr<PTHLexer> ThePTHLexer;
	PreprocessorLexer *ThePPLexer;
	std::unique_ptr<TokenLexer> TheTokenLexer;
	const DirectoryLookup *TheDirLookup;

	// The following constructors are completely useless copies of the default
	// versions, only needed to pacify MSVC.
	IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
	std::unique_ptr<Lexer> &&TheLexer,
	std::unique_ptr<PTHLexer> &&ThePTHLexer,
	PreprocessorLexer *ThePPLexer,
	std::unique_ptr<TokenLexer> &&TheTokenLexer,
	const DirectoryLookup *TheDirLookup)
	: CurLexerKind(std::move(CurLexerKind)),
	TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
	ThePTHLexer(std::move(ThePTHLexer)),
	ThePPLexer(std::move(ThePPLexer)),
	TheTokenLexer(std::move(TheTokenLexer)),
	TheDirLookup(std::move(TheDirLookup)) {}
	};
	std::vector<IncludeStackInfo> IncludeMacroStack;

	/// \brief Actions invoked when some preprocessor activity is
	/// encountered (e.g. a file is \#included, etc).
	std::unique_ptr<PPCallbacks> Callbacks;

	struct MacroExpandsInfo {
	Token Tok;
	MacroDefinition MD;
	SourceRange Range;
	MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
	: Tok(Tok), MD(MD), Range(Range) { }
	};
	SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;

	/// Information about a name that has been used to define a module macro.
	struct ModuleMacroInfo {
	ModuleMacroInfo(MacroDirective *MD)
	: MD(MD), ActiveModuleMacrosGeneration(0), IsAmbiguous(false) {}

	/// The most recent macro directive for this identifier.
	MacroDirective *MD;
	/// The active module macros for this identifier.
	llvm::TinyPtrVector<ModuleMacro*> ActiveModuleMacros;
	/// The generation number at which we last updated ActiveModuleMacros.
	/// \see Preprocessor::VisibleModules.
	unsigned ActiveModuleMacrosGeneration;
	/// Whether this macro name is ambiguous.
	bool IsAmbiguous;
	/// The module macros that are overridden by this macro.
	llvm::TinyPtrVector<ModuleMacro*> OverriddenMacros;
	};

	/// The state of a macro for an identifier.
	class MacroState {
	mutable llvm::PointerUnion<MacroDirective , ModuleMacroInfo > State;

	ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
	const IdentifierInfo *II) const {
	if (II->isOutOfDate())
	PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
	// FIXME: Find a spare bit on IdentifierInfo and store a
	// HasModuleMacros flag.
	if (!II->hasMacroDefinition() \|\|
	(!PP.getLangOpts().Modules &&
	!PP.getLangOpts().ModulesLocalVisibility) \|\|
	!PP.CurSubmoduleState->VisibleModules.getGeneration())
	return nullptr;

	auto Info = State.dyn_cast<ModuleMacroInfo>();
	if (!Info) {
	Info = new (PP.getPreprocessorAllocator())
	ModuleMacroInfo(State.get<MacroDirective *>());
	State = Info;
	}

	if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
	Info->ActiveModuleMacrosGeneration)
	PP.updateModuleMacroInfo(II, *Info);
	return Info;
	}

	public:
	MacroState() : MacroState(nullptr) {}
	MacroState(MacroDirective *MD) : State(MD) {}
	MacroState(MacroState &&O) noexcept : State(O.State) {
	O.State = (MacroDirective *)nullptr;
	}
	MacroState &operator=(MacroState &&O) noexcept {
	auto S = O.State;
	O.State = (MacroDirective *)nullptr;
	State = S;
	return *this;
	}
	~MacroState() {
	if (auto Info = State.dyn_cast<ModuleMacroInfo>())
	Info->~ModuleMacroInfo();
	}

	MacroDirective *getLatest() const {
	if (auto Info = State.dyn_cast<ModuleMacroInfo>())
	return Info->MD;
	return State.get<MacroDirective*>();
	}
	void setLatest(MacroDirective *MD) {
	if (auto Info = State.dyn_cast<ModuleMacroInfo>())
	Info->MD = MD;
	else
	State = MD;
	}

	bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
	auto *Info = getModuleInfo(PP, II);
	return Info ? Info->IsAmbiguous : false;
	}
	ArrayRef<ModuleMacro *>
	getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
	if (auto *Info = getModuleInfo(PP, II))
	return Info->ActiveModuleMacros;
	return None;
	}

	MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
	SourceManager &SourceMgr) const {
	// FIXME: Incorporate module macros into the result of this.
	if (auto *Latest = getLatest())
	return Latest->findDirectiveAtLoc(Loc, SourceMgr);
	return MacroDirective::DefInfo();
	}

	void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
	if (auto *Info = getModuleInfo(PP, II)) {
	Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
	Info->ActiveModuleMacros.begin(),
	Info->ActiveModuleMacros.end());
	Info->ActiveModuleMacros.clear();
	Info->IsAmbiguous = false;
	}
	}
	ArrayRef<ModuleMacro*> getOverriddenMacros() const {
	if (auto Info = State.dyn_cast<ModuleMacroInfo>())
	return Info->OverriddenMacros;
	return None;
	}
	void setOverriddenMacros(Preprocessor &PP,
	ArrayRef<ModuleMacro *> Overrides) {
	auto Info = State.dyn_cast<ModuleMacroInfo>();
	if (!Info) {
	if (Overrides.empty())
	return;
	Info = new (PP.getPreprocessorAllocator())
	ModuleMacroInfo(State.get<MacroDirective *>());
	State = Info;
	}
	Info->OverriddenMacros.clear();
	Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
	Overrides.begin(), Overrides.end());
	Info->ActiveModuleMacrosGeneration = 0;
	}
	};

	/// For each IdentifierInfo that was associated with a macro, we
	/// keep a mapping to the history of all macro definitions and #undefs in
	/// the reverse order (the latest one is in the head of the list).
	///
	/// This mapping lives within the \p CurSubmoduleState.
	typedef llvm::DenseMap<const IdentifierInfo *, MacroState> MacroMap;

	friend class ASTReader;

	struct SubmoduleState;

	/// \brief Information about a submodule that we're currently building.
	struct BuildingSubmoduleInfo {
	BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
	SubmoduleState *OuterSubmoduleState,
	unsigned OuterPendingModuleMacroNames)
	: M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
	OuterSubmoduleState(OuterSubmoduleState),
	OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}

	/// The module that we are building.
	Module *M;
	/// The location at which the module was included.
	SourceLocation ImportLoc;
	/// Whether we entered this submodule via a pragma.
	bool IsPragma;
	/// The previous SubmoduleState.
	SubmoduleState *OuterSubmoduleState;
	/// The number of pending module macro names when we started building this.
	unsigned OuterPendingModuleMacroNames;
	};
	SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;

	/// \brief Information about a submodule's preprocessor state.
	struct SubmoduleState {
	/// The macros for the submodule.
	MacroMap Macros;
	/// The set of modules that are visible within the submodule.
	VisibleModuleSet VisibleModules;
	// FIXME: CounterValue?
	// FIXME: PragmaPushMacroInfo?
	};
	std::map<Module*, SubmoduleState> Submodules;

	/// The preprocessor state for preprocessing outside of any submodule.
	SubmoduleState NullSubmoduleState;

	/// The current submodule state. Will be \p NullSubmoduleState if we're not
	/// in a submodule.
	SubmoduleState *CurSubmoduleState;

	/// The set of known macros exported from modules.
	llvm::FoldingSet<ModuleMacro> ModuleMacros;

	/// The names of potential module macros that we've not yet processed.
	llvm::SmallVector<const IdentifierInfo*, 32> PendingModuleMacroNames;

	/// The list of module macros, for each identifier, that are not overridden by
	/// any other module macro.
	llvm::DenseMap<const IdentifierInfo , llvm::TinyPtrVector<ModuleMacro>>
	LeafModuleMacros;

	/// \brief Macros that we want to warn because they are not used at the end
	/// of the translation unit.
	///
	/// We store just their SourceLocations instead of
	/// something like MacroInfo*. The benefit of this is that when we are
	/// deserializing from PCH, we don't need to deserialize identifier & macros
	/// just so that we can report that they are unused, we just warn using
	/// the SourceLocations of this set (that will be filled by the ASTReader).
	/// We are using SmallPtrSet instead of a vector for faster removal.
	typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
	WarnUnusedMacroLocsTy WarnUnusedMacroLocs;

	/// \brief A "freelist" of MacroArg objects that can be
	/// reused for quick allocation.
	MacroArgs *MacroArgCache;
	friend class MacroArgs;

	/// For each IdentifierInfo used in a \#pragma push_macro directive,
	/// we keep a MacroInfo stack used to restore the previous macro value.
	llvm::DenseMap<IdentifierInfo, std::vector<MacroInfo> > PragmaPushMacroInfo;

	// Various statistics we track for performance analysis.
	unsigned NumDirectives, NumDefined, NumUndefined, NumPragma;
	unsigned NumIf, NumElse, NumEndif;
	unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
	unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
	unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
	unsigned NumSkipped;

	/// \brief The predefined macros that preprocessor should use from the
	/// command line etc.
	std::string Predefines;

	/// \brief The file ID for the preprocessor predefines.
	FileID PredefinesFileID;

	/// \{
	/// \brief Cache of macro expanders to reduce malloc traffic.
	enum { TokenLexerCacheSize = 8 };
	unsigned NumCachedTokenLexers;
	std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
	/// \}

	/// \brief Keeps macro expanded tokens for TokenLexers.
	//
	/// Works like a stack; a TokenLexer adds the macro expanded tokens that is
	/// going to lex in the cache and when it finishes the tokens are removed
	/// from the end of the cache.
	SmallVector<Token, 16> MacroExpandedTokens;
	std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;

	/// \brief A record of the macro definitions and expansions that
	/// occurred during preprocessing.
	///
	/// This is an optional side structure that can be enabled with
	/// \c createPreprocessingRecord() prior to preprocessing.
	PreprocessingRecord *Record;

	/// Cached tokens state.
	typedef SmallVector<Token, 1> CachedTokensTy;

	/// \brief Cached tokens are stored here when we do backtracking or
	/// lookahead. They are "lexed" by the CachingLex() method.
	CachedTokensTy CachedTokens;

	/// \brief The position of the cached token that CachingLex() should
	/// "lex" next.
	///
	/// If it points beyond the CachedTokens vector, it means that a normal
	/// Lex() should be invoked.
	CachedTokensTy::size_type CachedLexPos;

	/// \brief Stack of backtrack positions, allowing nested backtracks.
	///
	/// The EnableBacktrackAtThisPos() method pushes a position to
	/// indicate where CachedLexPos should be set when the BackTrack() method is
	/// invoked (at which point the last position is popped).
	std::vector<CachedTokensTy::size_type> BacktrackPositions;

	struct MacroInfoChain {
	MacroInfo MI;
	MacroInfoChain *Next;
	};

	/// MacroInfos are managed as a chain for easy disposal. This is the head
	/// of that list.
	MacroInfoChain *MIChainHead;

	void updateOutOfDateIdentifier(IdentifierInfo &II) const;

	public:
	Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
	DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM,
	MemoryBufferCache &PCMCache,
	HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
	IdentifierInfoLookup *IILookup = nullptr,
	bool OwnsHeaderSearch = false,
	TranslationUnitKind TUKind = TU_Complete);

	~Preprocessor();

	/// \brief Initialize the preprocessor using information about the target.
	///
	/// \param Target is owned by the caller and must remain valid for the
	/// lifetime of the preprocessor.
	/// \param AuxTarget is owned by the caller and must remain valid for
	/// the lifetime of the preprocessor.
	void Initialize(const TargetInfo &Target,
	const TargetInfo *AuxTarget = nullptr);

	/// \brief Initialize the preprocessor to parse a model file
	///
	/// To parse model files the preprocessor of the original source is reused to
	/// preserver the identifier table. However to avoid some duplicate
	/// information in the preprocessor some cleanup is needed before it is used
	/// to parse model files. This method does that cleanup.
	void InitializeForModelFile();

	/// \brief Cleanup after model file parsing
	void FinalizeForModelFile();

	/// \brief Retrieve the preprocessor options used to initialize this
	/// preprocessor.
	PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }

	DiagnosticsEngine &getDiagnostics() const { return *Diags; }
	void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }

	const LangOptions &getLangOpts() const { return LangOpts; }
	const TargetInfo &getTargetInfo() const { return *Target; }
	const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
	FileManager &getFileManager() const { return FileMgr; }
	SourceManager &getSourceManager() const { return SourceMgr; }
	MemoryBufferCache &getPCMCache() const { return PCMCache; }
	HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }

	IdentifierTable &getIdentifierTable() { return Identifiers; }
	const IdentifierTable &getIdentifierTable() const { return Identifiers; }
	SelectorTable &getSelectorTable() { return Selectors; }
	Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
	llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }

	void setPTHManager(PTHManager* pm);

	PTHManager *getPTHManager() { return PTH.get(); }

	void setExternalSource(ExternalPreprocessorSource *Source) {
	ExternalSource = Source;
	}

	ExternalPreprocessorSource *getExternalSource() const {
	return ExternalSource;
	}

	/// \brief Retrieve the module loader associated with this preprocessor.
	ModuleLoader &getModuleLoader() const { return TheModuleLoader; }

	bool hadModuleLoaderFatalFailure() const {
	return TheModuleLoader.HadFatalFailure;
	}

	/// \brief True if we are currently preprocessing a #if or #elif directive
	bool isParsingIfOrElifDirective() const {
	return ParsingIfOrElifDirective;
	}

	/// \brief Control whether the preprocessor retains comments in output.
	void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
	this->KeepComments = KeepComments \| KeepMacroComments;
	this->KeepMacroComments = KeepMacroComments;
	}

	bool getCommentRetentionState() const { return KeepComments; }

	void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
	bool getPragmasEnabled() const { return PragmasEnabled; }

	void SetSuppressIncludeNotFoundError(bool Suppress) {
	SuppressIncludeNotFoundError = Suppress;
	}

	bool GetSuppressIncludeNotFoundError() {
	return SuppressIncludeNotFoundError;
	}

	/// Sets whether the preprocessor is responsible for producing output or if
	/// it is producing tokens to be consumed by Parse and Sema.
	void setPreprocessedOutput(bool IsPreprocessedOutput) {
	PreprocessedOutput = IsPreprocessedOutput;
	}

	/// Returns true if the preprocessor is responsible for generating output,
	/// false if it is producing tokens to be consumed by Parse and Sema.
	bool isPreprocessedOutput() const { return PreprocessedOutput; }

	/// \brief Return true if we are lexing directly from the specified lexer.
	bool isCurrentLexer(const PreprocessorLexer *L) const {
	return CurPPLexer == L;
	}

	/// \brief Return the current lexer being lexed from.
	///
	/// Note that this ignores any potentially active macro expansions and _Pragma
	/// expansions going on at the time.
	PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }

	/// \brief Return the current file lexer being lexed from.
	///
	/// Note that this ignores any potentially active macro expansions and _Pragma
	/// expansions going on at the time.
	PreprocessorLexer *getCurrentFileLexer() const;

	/// \brief Return the submodule owning the file being lexed. This may not be
	/// the current module if we have changed modules since entering the file.
	Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }

	/// \brief Returns the FileID for the preprocessor predefines.
	FileID getPredefinesFileID() const { return PredefinesFileID; }

	/// \{
	/// \brief Accessors for preprocessor callbacks.
	///
	/// Note that this class takes ownership of any PPCallbacks object given to
	/// it.
	PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
	void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
	if (Callbacks)
	C = llvm::make_unique<PPChainedCallbacks>(std::move(C),
	std::move(Callbacks));
	Callbacks = std::move(C);
	}
	/// \}

	bool isMacroDefined(StringRef Id) {
	return isMacroDefined(&Identifiers.get(Id));
	}
	bool isMacroDefined(const IdentifierInfo *II) {
	return II->hasMacroDefinition() &&
	(!getLangOpts().Modules \|\| (bool)getMacroDefinition(II));
	}

	/// \brief Determine whether II is defined as a macro within the module M,
	/// if that is a module that we've already preprocessed. Does not check for
	/// macros imported into M.
	bool isMacroDefinedInLocalModule(const IdentifierInfo II, Module M) {
	if (!II->hasMacroDefinition())
	return false;
	auto I = Submodules.find(M);
	if (I == Submodules.end())
	return false;
	auto J = I->second.Macros.find(II);
	if (J == I->second.Macros.end())
	return false;
	auto *MD = J->second.getLatest();
	return MD && MD->isDefined();
	}

	MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
	if (!II->hasMacroDefinition())
	return MacroDefinition();

	MacroState &S = CurSubmoduleState->Macros[II];
	auto *MD = S.getLatest();
	while (MD && isa<VisibilityMacroDirective>(MD))
	MD = MD->getPrevious();
	return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
	S.getActiveModuleMacros(*this, II),
	S.isAmbiguous(*this, II));
	}

	MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
	SourceLocation Loc) {
	if (!II->hadMacroDefinition())
	return MacroDefinition();

	MacroState &S = CurSubmoduleState->Macros[II];
	MacroDirective::DefInfo DI;
	if (auto *MD = S.getLatest())
	DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
	// FIXME: Compute the set of active module macros at the specified location.
	return MacroDefinition(DI.getDirective(),
	S.getActiveModuleMacros(*this, II),
	S.isAmbiguous(*this, II));
	}

	/// \brief Given an identifier, return its latest non-imported MacroDirective
	/// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
	MacroDirective getLocalMacroDirective(const IdentifierInfo II) const {
	if (!II->hasMacroDefinition())
	return nullptr;

	auto *MD = getLocalMacroDirectiveHistory(II);
	if (!MD \|\| MD->getDefinition().isUndefined())
	return nullptr;

	return MD;
	}

	const MacroInfo getMacroInfo(const IdentifierInfo II) const {
	return const_cast<Preprocessor*>(this)->getMacroInfo(II);
	}

	MacroInfo getMacroInfo(const IdentifierInfo II) {
	if (!II->hasMacroDefinition())
	return nullptr;
	if (auto MD = getMacroDefinition(II))
	return MD.getMacroInfo();
	return nullptr;
	}

	/// \brief Given an identifier, return the latest non-imported macro
	/// directive for that identifier.
	///
	/// One can iterate over all previous macro directives from the most recent
	/// one.
	MacroDirective getLocalMacroDirectiveHistory(const IdentifierInfo II) const;

	/// \brief Add a directive to the macro directive history for this identifier.
	void appendMacroDirective(IdentifierInfo II, MacroDirective MD);
	DefMacroDirective appendDefMacroDirective(IdentifierInfo II, MacroInfo *MI,
	SourceLocation Loc) {
	DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
	appendMacroDirective(II, MD);
	return MD;
	}
	DefMacroDirective appendDefMacroDirective(IdentifierInfo II,
	MacroInfo *MI) {
	return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
	}
	/// \brief Set a MacroDirective that was loaded from a PCH file.
	void setLoadedMacroDirective(IdentifierInfo II, MacroDirective ED,
	MacroDirective *MD);

	/// \brief Register an exported macro for a module and identifier.
	ModuleMacro addModuleMacro(Module Mod, IdentifierInfo II, MacroInfo Macro,
	ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
	ModuleMacro getModuleMacro(Module Mod, IdentifierInfo *II);

	/// \brief Get the list of leaf (non-overridden) module macros for a name.
	ArrayRef<ModuleMacro> getLeafModuleMacros(const IdentifierInfo II) const {
	if (II->isOutOfDate())
	updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
	auto I = LeafModuleMacros.find(II);
	if (I != LeafModuleMacros.end())
	return I->second;
	return None;
	}

	/// \{
	/// Iterators for the macro history table. Currently defined macros have
	/// IdentifierInfo::hasMacroDefinition() set and an empty
	/// MacroInfo::getUndefLoc() at the head of the list.
	typedef MacroMap::const_iterator macro_iterator;
	macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
	macro_iterator macro_end(bool IncludeExternalMacros = true) const;
	llvm::iterator_range<macro_iterator>
	macros(bool IncludeExternalMacros = true) const {
	return llvm::make_range(macro_begin(IncludeExternalMacros),
	macro_end(IncludeExternalMacros));
	}
	/// \}

	/// \brief Return the name of the macro defined before \p Loc that has
	/// spelling \p Tokens. If there are multiple macros with same spelling,
	/// return the last one defined.
	StringRef getLastMacroWithSpelling(SourceLocation Loc,
	ArrayRef<TokenValue> Tokens) const;

	const std::string &getPredefines() const { return Predefines; }
	/// \brief Set the predefines for this Preprocessor.
	///
	/// These predefines are automatically injected when parsing the main file.
	void setPredefines(const char *P) { Predefines = P; }
	void setPredefines(StringRef P) { Predefines = P; }

	/// Return information about the specified preprocessor
	/// identifier token.
	IdentifierInfo *getIdentifierInfo(StringRef Name) const {
	return &Identifiers.get(Name);
	}

	/// \brief Add the specified pragma handler to this preprocessor.
	///
	/// If \p Namespace is non-null, then it is a token required to exist on the
	/// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
	void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
	void AddPragmaHandler(PragmaHandler *Handler) {
	AddPragmaHandler(StringRef(), Handler);
	}

	/// \brief Remove the specific pragma handler from this preprocessor.
	///
	/// If \p Namespace is non-null, then it should be the namespace that
	/// \p Handler was added to. It is an error to remove a handler that
	/// has not been registered.
	void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
	void RemovePragmaHandler(PragmaHandler *Handler) {
	RemovePragmaHandler(StringRef(), Handler);
	}

	/// Install empty handlers for all pragmas (making them ignored).
	void IgnorePragmas();

	/// \brief Add the specified comment handler to the preprocessor.
	void addCommentHandler(CommentHandler *Handler);

	/// \brief Remove the specified comment handler.
	///
	/// It is an error to remove a handler that has not been registered.
	void removeCommentHandler(CommentHandler *Handler);

	/// \brief Set the code completion handler to the given object.
	void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
	CodeComplete = &Handler;
	}

	/// \brief Retrieve the current code-completion handler.
	CodeCompletionHandler *getCodeCompletionHandler() const {
	return CodeComplete;
	}

	/// \brief Clear out the code completion handler.
	void clearCodeCompletionHandler() {
	CodeComplete = nullptr;
	}

	/// \brief Hook used by the lexer to invoke the "natural language" code
	/// completion point.
	void CodeCompleteNaturalLanguage();

	/// \brief Set the code completion token for filtering purposes.
	void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
	CodeCompletionII = Filter;
	}

	/// \brief Get the code completion token for filtering purposes.
	StringRef getCodeCompletionFilter() {
	if (CodeCompletionII)
	return CodeCompletionII->getName();
	return {};
	}

	/// \brief Retrieve the preprocessing record, or NULL if there is no
	/// preprocessing record.
	PreprocessingRecord *getPreprocessingRecord() const { return Record; }

	/// \brief Create a new preprocessing record, which will keep track of
	/// all macro expansions, macro definitions, etc.
	void createPreprocessingRecord();

	/// \brief Enter the specified FileID as the main source file,
	/// which implicitly adds the builtin defines etc.
	void EnterMainSourceFile();

	- /// \brief After parser warm-up, initialize the conditional stack from
	- /// the preamble.
	- void replayPreambleConditionalStack();
	-
	/// \brief Inform the preprocessor callbacks that processing is complete.
	void EndSourceFile();

	/// \brief Add a source file to the top of the include stack and
	/// start lexing tokens from it instead of the current buffer.
	///
	/// Emits a diagnostic, doesn't enter the file, and returns true on error.
	bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
	SourceLocation Loc);

	/// \brief Add a Macro to the top of the include stack and start lexing
	/// tokens from it instead of the current buffer.
	///
	/// \param Args specifies the tokens input to a function-like macro.
	/// \param ILEnd specifies the location of the ')' for a function-like macro
	/// or the identifier for an object-like macro.
	void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro,
	MacroArgs *Args);

	/// \brief Add a "macro" context to the top of the include stack,
	/// which will cause the lexer to start returning the specified tokens.
	///
	/// If \p DisableMacroExpansion is true, tokens lexed from the token stream
	/// will not be subject to further macro expansion. Otherwise, these tokens
	/// will be re-macro-expanded when/if expansion is enabled.
	///
	/// If \p OwnsTokens is false, this method assumes that the specified stream
	/// of tokens has a permanent owner somewhere, so they do not need to be
	/// copied. If it is true, it assumes the array of tokens is allocated with
	/// \c new[] and the Preprocessor will delete[] it.
	private:
	void EnterTokenStream(const Token *Toks, unsigned NumToks,
	bool DisableMacroExpansion, bool OwnsTokens);

	public:
	void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
	bool DisableMacroExpansion) {
	EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true);
	}
	void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion) {
	EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false);
	}

	/// \brief Pop the current lexer/macro exp off the top of the lexer stack.
	///
	/// This should only be used in situations where the current state of the
	/// top-of-stack lexer is known.
	void RemoveTopOfLexerStack();

	/// From the point that this method is called, and until
	/// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
	/// keeps track of the lexed tokens so that a subsequent Backtrack() call will
	/// make the Preprocessor re-lex the same tokens.
	///
	/// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
	/// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
	/// be combined with the EnableBacktrackAtThisPos calls in reverse order.
	///
	/// NOTE: DO NOT forget to call either CommitBacktrackedTokens or Backtrack
	/// at some point after EnableBacktrackAtThisPos. If you don't, caching of
	/// tokens will continue indefinitely.
	///
	void EnableBacktrackAtThisPos();

	/// \brief Disable the last EnableBacktrackAtThisPos call.
	void CommitBacktrackedTokens();

	struct CachedTokensRange {
	CachedTokensTy::size_type Begin, End;
	};

	private:
	/// \brief A range of cached tokens that should be erased after lexing
	/// when backtracking requires the erasure of such cached tokens.
	Optional<CachedTokensRange> CachedTokenRangeToErase;

	public:
	/// \brief Returns the range of cached tokens that were lexed since
	/// EnableBacktrackAtThisPos() was previously called.
	CachedTokensRange LastCachedTokenRange();

	/// \brief Erase the range of cached tokens that were lexed since
	/// EnableBacktrackAtThisPos() was previously called.
	void EraseCachedTokens(CachedTokensRange TokenRange);

	/// \brief Make Preprocessor re-lex the tokens that were lexed since
	/// EnableBacktrackAtThisPos() was previously called.
	void Backtrack();

	/// \brief True if EnableBacktrackAtThisPos() was called and
	/// caching of tokens is on.
	bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }

	/// \brief Lex the next token for this preprocessor.
	void Lex(Token &Result);

	void LexAfterModuleImport(Token &Result);

	void makeModuleVisible(Module *M, SourceLocation Loc);

	SourceLocation getModuleImportLoc(Module *M) const {
	return CurSubmoduleState->VisibleModules.getImportLoc(M);
	}

	/// \brief Lex a string literal, which may be the concatenation of multiple
	/// string literals and may even come from macro expansion.
	/// \returns true on success, false if a error diagnostic has been generated.
	bool LexStringLiteral(Token &Result, std::string &String,
	const char *DiagnosticTag, bool AllowMacroExpansion) {
	if (AllowMacroExpansion)
	Lex(Result);
	else
	LexUnexpandedToken(Result);
	return FinishLexStringLiteral(Result, String, DiagnosticTag,
	AllowMacroExpansion);
	}

	/// \brief Complete the lexing of a string literal where the first token has
	/// already been lexed (see LexStringLiteral).
	bool FinishLexStringLiteral(Token &Result, std::string &String,
	const char *DiagnosticTag,
	bool AllowMacroExpansion);

	/// \brief Lex a token. If it's a comment, keep lexing until we get
	/// something not a comment.
	///
	/// This is useful in -E -C mode where comments would foul up preprocessor
	/// directive handling.
	void LexNonComment(Token &Result) {
	do
	Lex(Result);
	while (Result.getKind() == tok::comment);
	}

	/// \brief Just like Lex, but disables macro expansion of identifier tokens.
	void LexUnexpandedToken(Token &Result) {
	// Disable macro expansion.
	bool OldVal = DisableMacroExpansion;
	DisableMacroExpansion = true;
	// Lex the token.
	Lex(Result);

	// Reenable it.
	DisableMacroExpansion = OldVal;
	}

	/// \brief Like LexNonComment, but this disables macro expansion of
	/// identifier tokens.
	void LexUnexpandedNonComment(Token &Result) {
	do
	LexUnexpandedToken(Result);
	while (Result.getKind() == tok::comment);
	}

	/// \brief Parses a simple integer literal to get its numeric value. Floating
	/// point literals and user defined literals are rejected. Used primarily to
	/// handle pragmas that accept integer arguments.
	bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);

	/// Disables macro expansion everywhere except for preprocessor directives.
	void SetMacroExpansionOnlyInDirectives() {
	DisableMacroExpansion = true;
	MacroExpansionInDirectivesOverride = true;
	}

	/// \brief Peeks ahead N tokens and returns that token without consuming any
	/// tokens.
	///
	/// LookAhead(0) returns the next token that would be returned by Lex(),
	/// LookAhead(1) returns the token after it, etc. This returns normal
	/// tokens after phase 5. As such, it is equivalent to using
	/// 'Lex', not 'LexUnexpandedToken'.
	const Token &LookAhead(unsigned N) {
	if (CachedLexPos + N < CachedTokens.size())
	return CachedTokens[CachedLexPos+N];
	else
	return PeekAhead(N+1);
	}

	/// \brief When backtracking is enabled and tokens are cached,
	/// this allows to revert a specific number of tokens.
	///
	/// Note that the number of tokens being reverted should be up to the last
	/// backtrack position, not more.
	void RevertCachedTokens(unsigned N) {
	assert(isBacktrackEnabled() &&
	"Should only be called when tokens are cached for backtracking");
	assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
	&& "Should revert tokens up to the last backtrack position, not more");
	assert(signed(CachedLexPos) - signed(N) >= 0 &&
	"Corrupted backtrack positions ?");
	CachedLexPos -= N;
	}

	/// \brief Enters a token in the token stream to be lexed next.
	///
	/// If BackTrack() is called afterwards, the token will remain at the
	/// insertion point.
	void EnterToken(const Token &Tok) {
	EnterCachingLexMode();
	CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
	}

	/// We notify the Preprocessor that if it is caching tokens (because
	/// backtrack is enabled) it should replace the most recent cached tokens
	/// with the given annotation token. This function has no effect if
	/// backtracking is not enabled.
	///
	/// Note that the use of this function is just for optimization, so that the
	/// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
	/// invoked.
	void AnnotateCachedTokens(const Token &Tok) {
	assert(Tok.isAnnotation() && "Expected annotation token");
	if (CachedLexPos != 0 && isBacktrackEnabled())
	AnnotatePreviousCachedTokens(Tok);
	}

	/// Get the location of the last cached token, suitable for setting the end
	/// location of an annotation token.
	SourceLocation getLastCachedTokenLocation() const {
	assert(CachedLexPos != 0);
	return CachedTokens[CachedLexPos-1].getLastLoc();
	}

	/// \brief Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
	/// CachedTokens.
	bool IsPreviousCachedToken(const Token &Tok) const;

	/// \brief Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
	/// in \p NewToks.
	///
	/// Useful when a token needs to be split in smaller ones and CachedTokens
	/// most recent token must to be updated to reflect that.
	void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);

	/// \brief Replace the last token with an annotation token.
	///
	/// Like AnnotateCachedTokens(), this routine replaces an
	/// already-parsed (and resolved) token with an annotation
	/// token. However, this routine only replaces the last token with
	/// the annotation token; it does not affect any other cached
	/// tokens. This function has no effect if backtracking is not
	/// enabled.
	void ReplaceLastTokenWithAnnotation(const Token &Tok) {
	assert(Tok.isAnnotation() && "Expected annotation token");
	if (CachedLexPos != 0 && isBacktrackEnabled())
	CachedTokens[CachedLexPos-1] = Tok;
	}

	/// Enter an annotation token into the token stream.
	void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
	void *AnnotationVal);

	/// Update the current token to represent the provided
	/// identifier, in order to cache an action performed by typo correction.
	void TypoCorrectToken(const Token &Tok) {
	assert(Tok.getIdentifierInfo() && "Expected identifier token");
	if (CachedLexPos != 0 && isBacktrackEnabled())
	CachedTokens[CachedLexPos-1] = Tok;
	}

	/// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
	/// CurTokenLexer pointers.
	void recomputeCurLexerKind();

	/// \brief Returns true if incremental processing is enabled
	bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }

	/// \brief Enables the incremental processing
	void enableIncrementalProcessing(bool value = true) {
	IncrementalProcessing = value;
	}

	/// \brief Specify the point at which code-completion will be performed.
	///
	/// \param File the file in which code completion should occur. If
	/// this file is included multiple times, code-completion will
	/// perform completion the first time it is included. If NULL, this
	/// function clears out the code-completion point.
	///
	/// \param Line the line at which code completion should occur
	/// (1-based).
	///
	/// \param Column the column at which code completion should occur
	/// (1-based).
	///
	/// \returns true if an error occurred, false otherwise.
	bool SetCodeCompletionPoint(const FileEntry *File,
	unsigned Line, unsigned Column);

	/// \brief Determine if we are performing code completion.
	bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }

	/// \brief Returns the location of the code-completion point.
	///
	/// Returns an invalid location if code-completion is not enabled or the file
	/// containing the code-completion point has not been lexed yet.
	SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }

	/// \brief Returns the start location of the file of code-completion point.
	///
	/// Returns an invalid location if code-completion is not enabled or the file
	/// containing the code-completion point has not been lexed yet.
	SourceLocation getCodeCompletionFileLoc() const {
	return CodeCompletionFileLoc;
	}

	/// \brief Returns true if code-completion is enabled and we have hit the
	/// code-completion point.
	bool isCodeCompletionReached() const { return CodeCompletionReached; }

	/// \brief Note that we hit the code-completion point.
	void setCodeCompletionReached() {
	assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
	CodeCompletionReached = true;
	// Silence any diagnostics that occur after we hit the code-completion.
	getDiagnostics().setSuppressAllDiagnostics(true);
	}

	/// \brief The location of the currently-active \#pragma clang
	/// arc_cf_code_audited begin.
	///
	/// Returns an invalid location if there is no such pragma active.
	SourceLocation getPragmaARCCFCodeAuditedLoc() const {
	return PragmaARCCFCodeAuditedLoc;
	}

	/// \brief Set the location of the currently-active \#pragma clang
	/// arc_cf_code_audited begin. An invalid location ends the pragma.
	void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
	PragmaARCCFCodeAuditedLoc = Loc;
	}

	/// \brief The location of the currently-active \#pragma clang
	/// assume_nonnull begin.
	///
	/// Returns an invalid location if there is no such pragma active.
	SourceLocation getPragmaAssumeNonNullLoc() const {
	return PragmaAssumeNonNullLoc;
	}

	/// \brief Set the location of the currently-active \#pragma clang
	/// assume_nonnull begin. An invalid location ends the pragma.
	void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
	PragmaAssumeNonNullLoc = Loc;
	}

	/// \brief Set the directory in which the main file should be considered
	/// to have been found, if it is not a real file.
	void setMainFileDir(const DirectoryEntry *Dir) {
	MainFileDir = Dir;
	}

	/// \brief Instruct the preprocessor to skip part of the main source file.
	///
	/// \param Bytes The number of bytes in the preamble to skip.
	///
	/// \param StartOfLine Whether skipping these bytes puts the lexer at the
	/// start of a line.
	void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
	SkipMainFilePreamble.first = Bytes;
	SkipMainFilePreamble.second = StartOfLine;
	}

	/// Forwarding function for diagnostics. This emits a diagnostic at
	/// the specified Token's location, translating the token's start
	/// position in the current buffer into a SourcePosition object for rendering.
	DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
	return Diags->Report(Loc, DiagID);
	}

	DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
	return Diags->Report(Tok.getLocation(), DiagID);
	}

	/// Return the 'spelling' of the token at the given
	/// location; does not go up to the spelling location or down to the
	/// expansion location.
	///
	/// \param buffer A buffer which will be used only if the token requires
	/// "cleaning", e.g. if it contains trigraphs or escaped newlines
	/// \param invalid If non-null, will be set \c true if an error occurs.
	StringRef getSpelling(SourceLocation loc,
	SmallVectorImpl<char> &buffer,
	bool *invalid = nullptr) const {
	return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
	}

	/// \brief Return the 'spelling' of the Tok token.
	///
	/// The spelling of a token is the characters used to represent the token in
	/// the source file after trigraph expansion and escaped-newline folding. In
	/// particular, this wants to get the true, uncanonicalized, spelling of
	/// things like digraphs, UCNs, etc.
	///
	/// \param Invalid If non-null, will be set \c true if an error occurs.
	std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
	return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
	}

	/// \brief Get the spelling of a token into a preallocated buffer, instead
	/// of as an std::string.
	///
	/// The caller is required to allocate enough space for the token, which is
	/// guaranteed to be at least Tok.getLength() bytes long. The length of the
	/// actual result is returned.
	///
	/// Note that this method may do two possible things: it may either fill in
	/// the buffer specified with characters, or it may change the input pointer
	/// to point to a constant buffer with the data already in it (avoiding a
	/// copy). The caller is not allowed to modify the returned buffer pointer
	/// if an internal buffer is returned.
	unsigned getSpelling(const Token &Tok, const char *&Buffer,
	bool *Invalid = nullptr) const {
	return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
	}

	/// \brief Get the spelling of a token into a SmallVector.
	///
	/// Note that the returned StringRef may not point to the
	/// supplied buffer if a copy can be avoided.
	StringRef getSpelling(const Token &Tok,
	SmallVectorImpl<char> &Buffer,
	bool *Invalid = nullptr) const;

	/// \brief Relex the token at the specified location.
	/// \returns true if there was a failure, false on success.
	bool getRawToken(SourceLocation Loc, Token &Result,
	bool IgnoreWhiteSpace = false) {
	return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
	}

	/// \brief Given a Token \p Tok that is a numeric constant with length 1,
	/// return the character.
	char
	getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
	bool *Invalid = nullptr) const {
	assert(Tok.is(tok::numeric_constant) &&
	Tok.getLength() == 1 && "Called on unsupported token");
	assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");

	// If the token is carrying a literal data pointer, just use it.
	if (const char *D = Tok.getLiteralData())
	return *D;

	// Otherwise, fall back on getCharacterData, which is slower, but always
	// works.
	return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
	}

	/// \brief Retrieve the name of the immediate macro expansion.
	///
	/// This routine starts from a source location, and finds the name of the
	/// macro responsible for its immediate expansion. It looks through any
	/// intervening macro argument expansions to compute this. It returns a
	/// StringRef that refers to the SourceManager-owned buffer of the source
	/// where that macro name is spelled. Thus, the result shouldn't out-live
	/// the SourceManager.
	StringRef getImmediateMacroName(SourceLocation Loc) {
	return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
	}

	/// \brief Plop the specified string into a scratch buffer and set the
	/// specified token's location and length to it.
	///
	/// If specified, the source location provides a location of the expansion
	/// point of the token.
	void CreateString(StringRef Str, Token &Tok,
	SourceLocation ExpansionLocStart = SourceLocation(),
	SourceLocation ExpansionLocEnd = SourceLocation());

	/// \brief Computes the source location just past the end of the
	/// token at this source location.
	///
	/// This routine can be used to produce a source location that
	/// points just past the end of the token referenced by \p Loc, and
	/// is generally used when a diagnostic needs to point just after a
	/// token where it expected something different that it received. If
	/// the returned source location would not be meaningful (e.g., if
	/// it points into a macro), this routine returns an invalid
	/// source location.
	///
	/// \param Offset an offset from the end of the token, where the source
	/// location should refer to. The default offset (0) produces a source
	/// location pointing just past the end of the token; an offset of 1 produces
	/// a source location pointing to the last character in the token, etc.
	SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
	return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
	}

	/// \brief Returns true if the given MacroID location points at the first
	/// token of the macro expansion.
	///
	/// \param MacroBegin If non-null and function returns true, it is set to
	/// begin location of the macro.
	bool isAtStartOfMacroExpansion(SourceLocation loc,
	SourceLocation *MacroBegin = nullptr) const {
	return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
	MacroBegin);
	}

	/// \brief Returns true if the given MacroID location points at the last
	/// token of the macro expansion.
	///
	/// \param MacroEnd If non-null and function returns true, it is set to
	/// end location of the macro.
	bool isAtEndOfMacroExpansion(SourceLocation loc,
	SourceLocation *MacroEnd = nullptr) const {
	return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
	}

	/// \brief Print the token to stderr, used for debugging.
	void DumpToken(const Token &Tok, bool DumpFlags = false) const;
	void DumpLocation(SourceLocation Loc) const;
	void DumpMacro(const MacroInfo &MI) const;
	void dumpMacroInfo(const IdentifierInfo *II);

	/// \brief Given a location that specifies the start of a
	/// token, return a new location that specifies a character within the token.
	SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
	unsigned Char) const {
	return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
	}

	/// \brief Increment the counters for the number of token paste operations
	/// performed.
	///
	/// If fast was specified, this is a 'fast paste' case we handled.
	void IncrementPasteCounter(bool isFast) {
	if (isFast)
	++NumFastTokenPaste;
	else
	++NumTokenPaste;
	}

	void PrintStats();

	size_t getTotalMemory() const;

	/// When the macro expander pastes together a comment (/##/) in Microsoft
	/// mode, this method handles updating the current state, returning the
	/// token on the next source line.
	void HandleMicrosoftCommentPaste(Token &Tok);

	//===--------------------------------------------------------------------===//
	// Preprocessor callback methods. These are invoked by a lexer as various
	// directives and events are found.

	/// Given a tok::raw_identifier token, look up the
	/// identifier information for the token and install it into the token,
	/// updating the token kind accordingly.
	IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;

	private:
	llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;

	public:

	/// \brief Specifies the reason for poisoning an identifier.
	///
	/// If that identifier is accessed while poisoned, then this reason will be
	/// used instead of the default "poisoned" diagnostic.
	void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);

	/// \brief Display reason for poisoned identifier.
	void HandlePoisonedIdentifier(Token & Tok);

	void MaybeHandlePoisonedIdentifier(Token & Identifier) {
	if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
	if(II->isPoisoned()) {
	HandlePoisonedIdentifier(Identifier);
	}
	}
	}

	private:
	/// Identifiers used for SEH handling in Borland. These are only
	/// allowed in particular circumstances
	// __except block
	IdentifierInfo *Ident__exception_code,
	*Ident___exception_code,
	*Ident_GetExceptionCode;
	// __except filter expression
	IdentifierInfo *Ident__exception_info,
	*Ident___exception_info,
	*Ident_GetExceptionInfo;
	// __finally
	IdentifierInfo *Ident__abnormal_termination,
	*Ident___abnormal_termination,
	*Ident_AbnormalTermination;

	const char *getCurLexerEndPos();
	void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);

	public:
	void PoisonSEHIdentifiers(bool Poison = true); // Borland

	/// \brief Callback invoked when the lexer reads an identifier and has
	/// filled in the tokens IdentifierInfo member.
	///
	/// This callback potentially macro expands it or turns it into a named
	/// token (like 'for').
	///
	/// \returns true if we actually computed a token, false if we need to
	/// lex again.
	bool HandleIdentifier(Token &Identifier);


	/// \brief Callback invoked when the lexer hits the end of the current file.
	///
	/// This either returns the EOF token and returns true, or
	/// pops a level off the include stack and returns false, at which point the
	/// client should call lex again.
	bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);

	/// \brief Callback invoked when the current TokenLexer hits the end of its
	/// token stream.
	bool HandleEndOfTokenLexer(Token &Result);

	/// \brief Callback invoked when the lexer sees a # token at the start of a
	/// line.
	///
	/// This consumes the directive, modifies the lexer/preprocessor state, and
	/// advances the lexer(s) so that the next token read is the correct one.
	void HandleDirective(Token &Result);

	/// \brief Ensure that the next token is a tok::eod token.
	///
	/// If not, emit a diagnostic and consume up until the eod.
	/// If \p EnableMacros is true, then we consider macros that expand to zero
	/// tokens as being ok.
	void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);

	/// \brief Read and discard all tokens remaining on the current line until
	/// the tok::eod token is found.
	void DiscardUntilEndOfDirective();

	/// \brief Returns true if the preprocessor has seen a use of
	/// __DATE__ or __TIME__ in the file so far.
	bool SawDateOrTime() const {
	return DATELoc != SourceLocation() \|\| TIMELoc != SourceLocation();
	}
	unsigned getCounterValue() const { return CounterValue; }
	void setCounterValue(unsigned V) { CounterValue = V; }

	/// \brief Retrieves the module that we're currently building, if any.
	Module *getCurrentModule();

	/// \brief Allocate a new MacroInfo object with the provided SourceLocation.
	MacroInfo *AllocateMacroInfo(SourceLocation L);

	/// \brief Turn the specified lexer token into a fully checked and spelled
	/// filename, e.g. as an operand of \#include.
	///
	/// The caller is expected to provide a buffer that is large enough to hold
	/// the spelling of the filename, but is also expected to handle the case
	/// when this method decides to use a different buffer.
	///
	/// \returns true if the input filename was in <>'s or false if it was
	/// in ""'s.
	bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename);

	/// \brief Given a "foo" or \<foo> reference, look up the indicated file.
	///
	/// Returns null on failure. \p isAngled indicates whether the file
	/// reference is for system \#include's or not (i.e. using <> instead of "").
	const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename,
	bool isAngled, const DirectoryLookup *FromDir,
	const FileEntry *FromFile,
	const DirectoryLookup *&CurDir,
	SmallVectorImpl<char> *SearchPath,
	SmallVectorImpl<char> *RelativePath,
	ModuleMap::KnownHeader *SuggestedModule,
	bool *IsMapped, bool SkipCache = false);

	/// \brief Get the DirectoryLookup structure used to find the current
	/// FileEntry, if CurLexer is non-null and if applicable.
	///
	/// This allows us to implement \#include_next and find directory-specific
	/// properties.
	const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }

	/// \brief Return true if we're in the top-level file, not in a \#include.
	bool isInPrimaryFile() const;

	/// \brief Handle cases where the \#include name is expanded
	/// from a macro as multiple tokens, which need to be glued together.
	///
	/// This occurs for code like:
	/// \code
	/// \#define FOO <x/y.h>
	/// \#include FOO
	/// \endcode
	/// because in this case, "<x/y.h>" is returned as 7 tokens, not one.
	///
	/// This code concatenates and consumes tokens up to the '>' token. It
	/// returns false if the > was found, otherwise it returns true if it finds
	/// and consumes the EOD marker.
	bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
	SourceLocation &End);

	/// \brief Lex an on-off-switch (C99 6.10.6p2) and verify that it is
	/// followed by EOD. Return true if the token is not a valid on-off-switch.
	bool LexOnOffSwitch(tok::OnOffSwitch &OOS);

	bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
	bool *ShadowFlag = nullptr);

	void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
	Module *LeaveSubmodule(bool ForPragma);

	private:
	void PushIncludeMacroStack() {
	assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
	IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
	std::move(CurLexer), std::move(CurPTHLexer),
	CurPPLexer, std::move(CurTokenLexer),
	CurDirLookup);
	CurPPLexer = nullptr;
	}

	void PopIncludeMacroStack() {
	CurLexer = std::move(IncludeMacroStack.back().TheLexer);
	CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer);
	CurPPLexer = IncludeMacroStack.back().ThePPLexer;
	CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
	CurDirLookup = IncludeMacroStack.back().TheDirLookup;
	CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
	CurLexerKind = IncludeMacroStack.back().CurLexerKind;
	IncludeMacroStack.pop_back();
	}

	void PropagateLineStartLeadingSpaceInfo(Token &Result);

	/// Determine whether we need to create module macros for #defines in the
	/// current context.
	bool needModuleMacros() const;

	/// Update the set of active module macros and ambiguity flag for a module
	/// macro name.
	void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);

	DefMacroDirective AllocateDefMacroDirective(MacroInfo MI,
	SourceLocation Loc);
	UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
	VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
	bool isPublic);

	/// \brief Lex and validate a macro name, which occurs after a
	/// \#define or \#undef.
	///
	/// \param MacroNameTok Token that represents the name defined or undefined.
	/// \param IsDefineUndef Kind if preprocessor directive.
	/// \param ShadowFlag Points to flag that is set if macro name shadows
	/// a keyword.
	///
	/// This emits a diagnostic, sets the token kind to eod,
	/// and discards the rest of the macro line if the macro name is invalid.
	void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
	bool *ShadowFlag = nullptr);

	/// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
	/// entire line) of the macro's tokens and adds them to MacroInfo, and while
	/// doing so performs certain validity checks including (but not limited to):
	/// - # (stringization) is followed by a macro parameter
	/// \param MacroNameTok - Token that represents the macro name
	/// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
	///
	/// Either returns a pointer to a MacroInfo object OR emits a diagnostic and
	/// returns a nullptr if an invalid sequence of tokens is encountered.

	MacroInfo *ReadOptionalMacroParameterListAndBody(
	const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);

	/// The ( starting an argument list of a macro definition has just been read.
	/// Lex the rest of the parameters and the closing ), updating \p MI with
	/// what we learn and saving in \p LastTok the last token read.
	/// Return true if an error occurs parsing the arg list.
	bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);

	/// We just read a \#if or related directive and decided that the
	/// subsequent tokens are in the \#if'd out portion of the
	/// file. Lex the rest of the file, until we see an \#endif. If \p
	/// FoundNonSkipPortion is true, then we have already emitted code for part of
	/// this \#if directive, so \#else/\#elif blocks should never be entered. If
	/// \p FoundElse is false, then \#else directives are ok, if not, then we have
	/// already seen one so a \#else directive is a duplicate. When this returns,
	/// the caller can lex the first valid token.
	void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
	bool FoundNonSkipPortion, bool FoundElse,
	SourceLocation ElseLoc = SourceLocation());

	/// \brief A fast PTH version of SkipExcludedConditionalBlock.
	void PTHSkipExcludedConditionalBlock();

	/// Information about the result for evaluating an expression for a
	/// preprocessor directive.
	struct DirectiveEvalResult {
	/// Whether the expression was evaluated as true or not.
	bool Conditional;
	/// True if the expression contained identifiers that were undefined.
	bool IncludedUndefinedIds;
	};

	/// \brief Evaluate an integer constant expression that may occur after a
	/// \#if or \#elif directive and return a \p DirectiveEvalResult object.
	///
	/// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
	DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);

	/// \brief Install the standard preprocessor pragmas:
	/// \#pragma GCC poison/system_header/dependency and \#pragma once.
	void RegisterBuiltinPragmas();

	/// \brief Register builtin macros such as __LINE__ with the identifier table.
	void RegisterBuiltinMacros();

	/// If an identifier token is read that is to be expanded as a macro, handle
	/// it and return the next token as 'Tok'. If we lexed a token, return true;
	/// otherwise the caller should lex again.
	bool HandleMacroExpandedIdentifier(Token &Tok, const MacroDefinition &MD);

	/// \brief Cache macro expanded tokens for TokenLexers.
	//
	/// Works like a stack; a TokenLexer adds the macro expanded tokens that is
	/// going to lex in the cache and when it finishes the tokens are removed
	/// from the end of the cache.
	Token cacheMacroExpandedTokens(TokenLexer tokLexer,
	ArrayRef<Token> tokens);
	void removeCachedMacroExpandedTokensOfLastLexer();
	friend void TokenLexer::ExpandFunctionArguments();

	/// Determine whether the next preprocessor token to be
	/// lexed is a '('. If so, consume the token and return true, if not, this
	/// method should have no observable side-effect on the lexed tokens.
	bool isNextPPTokenLParen();

	/// After reading "MACRO(", this method is invoked to read all of the formal
	/// arguments specified for the macro invocation. Returns null on error.
	MacroArgs ReadMacroCallArgumentList(Token &MacroName, MacroInfo MI,
	SourceLocation &ExpansionEnd);

	/// \brief If an identifier token is read that is to be expanded
	/// as a builtin macro, handle it and return the next token as 'Tok'.
	void ExpandBuiltinMacro(Token &Tok);

	/// \brief Read a \c _Pragma directive, slice it up, process it, then
	/// return the first token after the directive.
	/// This assumes that the \c _Pragma token has just been read into \p Tok.
	void Handle_Pragma(Token &Tok);

	/// \brief Like Handle_Pragma except the pragma text is not enclosed within
	/// a string literal.
	void HandleMicrosoft__pragma(Token &Tok);

	/// \brief Add a lexer to the top of the include stack and
	/// start lexing tokens from it instead of the current buffer.
	void EnterSourceFileWithLexer(Lexer TheLexer, const DirectoryLookup Dir);

	/// \brief Add a lexer to the top of the include stack and
	/// start getting tokens from it using the PTH cache.
	void EnterSourceFileWithPTH(PTHLexer PL, const DirectoryLookup Dir);

	/// \brief Set the FileID for the preprocessor predefines.
	void setPredefinesFileID(FileID FID) {
	assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
	PredefinesFileID = FID;
	}

	/// \brief Returns true if we are lexing from a file and not a
	/// pragma or a macro.
	static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
	return L ? !L->isPragmaLexer() : P != nullptr;
	}

	static bool IsFileLexer(const IncludeStackInfo& I) {
	return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
	}

	bool IsFileLexer() const {
	return IsFileLexer(CurLexer.get(), CurPPLexer);
	}

	//===--------------------------------------------------------------------===//
	// Caching stuff.
	void CachingLex(Token &Result);
	bool InCachingLexMode() const {
	// If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
	// that we are past EOF, not that we are in CachingLex mode.
	return !CurPPLexer && !CurTokenLexer && !CurPTHLexer &&
	!IncludeMacroStack.empty();
	}
	void EnterCachingLexMode();
	void ExitCachingLexMode() {
	if (InCachingLexMode())
	RemoveTopOfLexerStack();
	}
	const Token &PeekAhead(unsigned N);
	void AnnotatePreviousCachedTokens(const Token &Tok);

	//===--------------------------------------------------------------------===//
	/// Handle*Directive - implement the various preprocessor directives. These
	/// should side-effect the current preprocessor object so that the next call
	/// to Lex() will return the appropriate token next.
	void HandleLineDirective();
	void HandleDigitDirective(Token &Tok);
	void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
	void HandleIdentSCCSDirective(Token &Tok);
	void HandleMacroPublicDirective(Token &Tok);
	void HandleMacroPrivateDirective();

	// File inclusion.
	void HandleIncludeDirective(SourceLocation HashLoc,
	Token &Tok,
	const DirectoryLookup *LookupFrom = nullptr,
	const FileEntry *LookupFromFile = nullptr,
	bool isImport = false);
	void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
	void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
	void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
	void HandleMicrosoftImportDirective(Token &Tok);

	public:
	/// Check that the given module is available, producing a diagnostic if not.
	/// \return \c true if the check failed (because the module is not available).
	/// \c false if the module appears to be usable.
	static bool checkModuleIsAvailable(const LangOptions &LangOpts,
	const TargetInfo &TargetInfo,
	DiagnosticsEngine &Diags, Module *M);

	// Module inclusion testing.
	/// \brief Find the module that owns the source or header file that
	/// \p Loc points to. If the location is in a file that was included
	/// into a module, or is outside any module, returns nullptr.
	Module *getModuleForLocation(SourceLocation Loc);

	/// \brief We want to produce a diagnostic at location IncLoc concerning a
	/// missing module import.
	///
	/// \param IncLoc The location at which the missing import was detected.
	/// \param M The desired module.
	/// \param MLoc A location within the desired module at which some desired
	/// effect occurred (eg, where a desired entity was declared).
	///
	/// \return A file that can be #included to import a module containing MLoc.
	/// Null if no such file could be determined or if a #include is not
	/// appropriate.
	const FileEntry *getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
	Module *M,
	SourceLocation MLoc);

	bool isRecordingPreamble() const {
	return PreambleConditionalStack.isRecording();
	}

	bool hasRecordedPreamble() const {
	return PreambleConditionalStack.hasRecordedPreamble();
	}

	ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
	return PreambleConditionalStack.getStack();
	}

	void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
	PreambleConditionalStack.setStack(s);
	}

	void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
	PreambleConditionalStack.startReplaying();
	PreambleConditionalStack.setStack(s);
	}

	private:
	+ /// \brief After processing predefined file, initialize the conditional stack from
	+ /// the preamble.
	+ void replayPreambleConditionalStack();
	+
	// Macro handling.
	void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef);
	void HandleUndefDirective();

	// Conditional Inclusion.
	void HandleIfdefDirective(Token &Tok, bool isIfndef,
	bool ReadAnyTokensBeforeDirective);
	void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
	void HandleEndifDirective(Token &Tok);
	void HandleElseDirective(Token &Tok);
	void HandleElifDirective(Token &Tok);

	// Pragmas.
	void HandlePragmaDirective(SourceLocation IntroducerLoc,
	PragmaIntroducerKind Introducer);
	public:
	void HandlePragmaOnce(Token &OnceTok);
	void HandlePragmaMark();
	void HandlePragmaPoison();
	void HandlePragmaSystemHeader(Token &SysHeaderTok);
	void HandlePragmaDependency(Token &DependencyTok);
	void HandlePragmaPushMacro(Token &Tok);
	void HandlePragmaPopMacro(Token &Tok);
	void HandlePragmaIncludeAlias(Token &Tok);
	void HandlePragmaModuleBuild(Token &Tok);
	IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);

	// Return true and store the first token only if any CommentHandler
	// has inserted some tokens and getCommentRetentionState() is false.
	bool HandleComment(Token &Token, SourceRange Comment);

	/// \brief A macro is used, update information about macros that need unused
	/// warnings.
	void markMacroAsUsed(MacroInfo *MI);
	};

	/// \brief Abstract base class that describes a handler that will receive
	/// source ranges for each of the comments encountered in the source file.
	class CommentHandler {
	public:
	virtual ~CommentHandler();

	// The handler shall return true if it has pushed any tokens
	// to be read using e.g. EnterToken or EnterTokenStream.
	virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
	};

	/// \brief Registry of pragma handlers added by plugins
	typedef llvm::Registry<PragmaHandler> PragmaHandlerRegistry;

	} // end namespace clang

	#endif
	Index: head/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp (revision 322855)
	@@ -1,6249 +1,6254 @@
	//===--- ASTImporter.cpp - Importing ASTs from other Contexts ---- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the ASTImporter class which imports AST nodes from one
	// context into another context.
	//
	//===----------------------------------------------------------------------===//
	#include "clang/AST/ASTImporter.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTDiagnostic.h"
	#include "clang/AST/ASTStructuralEquivalence.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclVisitor.h"
	#include "clang/AST/StmtVisitor.h"
	#include "clang/AST/TypeVisitor.h"
	#include "clang/Basic/FileManager.h"
	#include "clang/Basic/SourceManager.h"
	#include "llvm/Support/MemoryBuffer.h"
	#include <deque>

	namespace clang {
	class ASTNodeImporter : public TypeVisitor<ASTNodeImporter, QualType>,
	public DeclVisitor<ASTNodeImporter, Decl *>,
	public StmtVisitor<ASTNodeImporter, Stmt *> {
	ASTImporter &Importer;

	public:
	explicit ASTNodeImporter(ASTImporter &Importer) : Importer(Importer) { }

	using TypeVisitor<ASTNodeImporter, QualType>::Visit;
	using DeclVisitor<ASTNodeImporter, Decl *>::Visit;
	using StmtVisitor<ASTNodeImporter, Stmt *>::Visit;

	// Importing types
	QualType VisitType(const Type *T);
	QualType VisitAtomicType(const AtomicType *T);
	QualType VisitBuiltinType(const BuiltinType *T);
	QualType VisitDecayedType(const DecayedType *T);
	QualType VisitComplexType(const ComplexType *T);
	QualType VisitPointerType(const PointerType *T);
	QualType VisitBlockPointerType(const BlockPointerType *T);
	QualType VisitLValueReferenceType(const LValueReferenceType *T);
	QualType VisitRValueReferenceType(const RValueReferenceType *T);
	QualType VisitMemberPointerType(const MemberPointerType *T);
	QualType VisitConstantArrayType(const ConstantArrayType *T);
	QualType VisitIncompleteArrayType(const IncompleteArrayType *T);
	QualType VisitVariableArrayType(const VariableArrayType *T);
	// FIXME: DependentSizedArrayType
	// FIXME: DependentSizedExtVectorType
	QualType VisitVectorType(const VectorType *T);
	QualType VisitExtVectorType(const ExtVectorType *T);
	QualType VisitFunctionNoProtoType(const FunctionNoProtoType *T);
	QualType VisitFunctionProtoType(const FunctionProtoType *T);
	// FIXME: UnresolvedUsingType
	QualType VisitParenType(const ParenType *T);
	QualType VisitTypedefType(const TypedefType *T);
	QualType VisitTypeOfExprType(const TypeOfExprType *T);
	// FIXME: DependentTypeOfExprType
	QualType VisitTypeOfType(const TypeOfType *T);
	QualType VisitDecltypeType(const DecltypeType *T);
	QualType VisitUnaryTransformType(const UnaryTransformType *T);
	QualType VisitAutoType(const AutoType *T);
	QualType VisitInjectedClassNameType(const InjectedClassNameType *T);
	// FIXME: DependentDecltypeType
	QualType VisitRecordType(const RecordType *T);
	QualType VisitEnumType(const EnumType *T);
	QualType VisitAttributedType(const AttributedType *T);
	QualType VisitTemplateTypeParmType(const TemplateTypeParmType *T);
	QualType VisitSubstTemplateTypeParmType(const SubstTemplateTypeParmType *T);
	QualType VisitTemplateSpecializationType(const TemplateSpecializationType *T);
	QualType VisitElaboratedType(const ElaboratedType *T);
	// FIXME: DependentNameType
	// FIXME: DependentTemplateSpecializationType
	QualType VisitObjCInterfaceType(const ObjCInterfaceType *T);
	QualType VisitObjCObjectType(const ObjCObjectType *T);
	QualType VisitObjCObjectPointerType(const ObjCObjectPointerType *T);

	// Importing declarations
	bool ImportDeclParts(NamedDecl D, DeclContext &DC,
	DeclContext *&LexicalDC, DeclarationName &Name,
	NamedDecl *&ToD, SourceLocation &Loc);
	void ImportDefinitionIfNeeded(Decl FromD, Decl ToD = nullptr);
	void ImportDeclarationNameLoc(const DeclarationNameInfo &From,
	DeclarationNameInfo& To);
	void ImportDeclContext(DeclContext *FromDC, bool ForceImport = false);

	bool ImportCastPath(CastExpr *E, CXXCastPath &Path);

	typedef DesignatedInitExpr::Designator Designator;
	Designator ImportDesignator(const Designator &D);


	/// \brief What we should import from the definition.
	enum ImportDefinitionKind {
	/// \brief Import the default subset of the definition, which might be
	/// nothing (if minimal import is set) or might be everything (if minimal
	/// import is not set).
	IDK_Default,
	/// \brief Import everything.
	IDK_Everything,
	/// \brief Import only the bare bones needed to establish a valid
	/// DeclContext.
	IDK_Basic
	};

	bool shouldForceImportDeclContext(ImportDefinitionKind IDK) {
	return IDK == IDK_Everything \|\|
	(IDK == IDK_Default && !Importer.isMinimalImport());
	}

	bool ImportDefinition(RecordDecl From, RecordDecl To,
	ImportDefinitionKind Kind = IDK_Default);
	bool ImportDefinition(VarDecl From, VarDecl To,
	ImportDefinitionKind Kind = IDK_Default);
	bool ImportDefinition(EnumDecl From, EnumDecl To,
	ImportDefinitionKind Kind = IDK_Default);
	bool ImportDefinition(ObjCInterfaceDecl From, ObjCInterfaceDecl To,
	ImportDefinitionKind Kind = IDK_Default);
	bool ImportDefinition(ObjCProtocolDecl From, ObjCProtocolDecl To,
	ImportDefinitionKind Kind = IDK_Default);
	TemplateParameterList *ImportTemplateParameterList(
	TemplateParameterList *Params);
	TemplateArgument ImportTemplateArgument(const TemplateArgument &From);
	TemplateArgumentLoc ImportTemplateArgumentLoc(
	const TemplateArgumentLoc &TALoc, bool &Error);
	bool ImportTemplateArguments(const TemplateArgument *FromArgs,
	unsigned NumFromArgs,
	SmallVectorImpl<TemplateArgument> &ToArgs);
	bool IsStructuralMatch(RecordDecl FromRecord, RecordDecl ToRecord,
	bool Complain = true);
	bool IsStructuralMatch(VarDecl FromVar, VarDecl ToVar,
	bool Complain = true);
	bool IsStructuralMatch(EnumDecl FromEnum, EnumDecl ToRecord);
	bool IsStructuralMatch(EnumConstantDecl FromEC, EnumConstantDecl ToEC);
	bool IsStructuralMatch(ClassTemplateDecl From, ClassTemplateDecl To);
	bool IsStructuralMatch(VarTemplateDecl From, VarTemplateDecl To);
	Decl VisitDecl(Decl D);
	Decl VisitAccessSpecDecl(AccessSpecDecl D);
	Decl VisitStaticAssertDecl(StaticAssertDecl D);
	Decl VisitTranslationUnitDecl(TranslationUnitDecl D);
	Decl VisitNamespaceDecl(NamespaceDecl D);
	Decl VisitTypedefNameDecl(TypedefNameDecl D, bool IsAlias);
	Decl VisitTypedefDecl(TypedefDecl D);
	Decl VisitTypeAliasDecl(TypeAliasDecl D);
	Decl VisitLabelDecl(LabelDecl D);
	Decl VisitEnumDecl(EnumDecl D);
	Decl VisitRecordDecl(RecordDecl D);
	Decl VisitEnumConstantDecl(EnumConstantDecl D);
	Decl VisitFunctionDecl(FunctionDecl D);
	Decl VisitCXXMethodDecl(CXXMethodDecl D);
	Decl VisitCXXConstructorDecl(CXXConstructorDecl D);
	Decl VisitCXXDestructorDecl(CXXDestructorDecl D);
	Decl VisitCXXConversionDecl(CXXConversionDecl D);
	Decl VisitFieldDecl(FieldDecl D);
	Decl VisitIndirectFieldDecl(IndirectFieldDecl D);
	Decl VisitFriendDecl(FriendDecl D);
	Decl VisitObjCIvarDecl(ObjCIvarDecl D);
	Decl VisitVarDecl(VarDecl D);
	Decl VisitImplicitParamDecl(ImplicitParamDecl D);
	Decl VisitParmVarDecl(ParmVarDecl D);
	Decl VisitObjCMethodDecl(ObjCMethodDecl D);
	Decl VisitObjCTypeParamDecl(ObjCTypeParamDecl D);
	Decl VisitObjCCategoryDecl(ObjCCategoryDecl D);
	Decl VisitObjCProtocolDecl(ObjCProtocolDecl D);
	Decl VisitLinkageSpecDecl(LinkageSpecDecl D);

	ObjCTypeParamList ImportObjCTypeParamList(ObjCTypeParamList list);
	Decl VisitObjCInterfaceDecl(ObjCInterfaceDecl D);
	Decl VisitObjCCategoryImplDecl(ObjCCategoryImplDecl D);
	Decl VisitObjCImplementationDecl(ObjCImplementationDecl D);
	Decl VisitObjCPropertyDecl(ObjCPropertyDecl D);
	Decl VisitObjCPropertyImplDecl(ObjCPropertyImplDecl D);
	Decl VisitTemplateTypeParmDecl(TemplateTypeParmDecl D);
	Decl VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl D);
	Decl VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl D);
	Decl VisitClassTemplateDecl(ClassTemplateDecl D);
	Decl *VisitClassTemplateSpecializationDecl(
	ClassTemplateSpecializationDecl *D);
	Decl VisitVarTemplateDecl(VarTemplateDecl D);
	Decl VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl D);

	// Importing statements
	DeclGroupRef ImportDeclGroup(DeclGroupRef DG);

	Stmt VisitStmt(Stmt S);
	Stmt VisitGCCAsmStmt(GCCAsmStmt S);
	Stmt VisitDeclStmt(DeclStmt S);
	Stmt VisitNullStmt(NullStmt S);
	Stmt VisitCompoundStmt(CompoundStmt S);
	Stmt VisitCaseStmt(CaseStmt S);
	Stmt VisitDefaultStmt(DefaultStmt S);
	Stmt VisitLabelStmt(LabelStmt S);
	Stmt VisitAttributedStmt(AttributedStmt S);
	Stmt VisitIfStmt(IfStmt S);
	Stmt VisitSwitchStmt(SwitchStmt S);
	Stmt VisitWhileStmt(WhileStmt S);
	Stmt VisitDoStmt(DoStmt S);
	Stmt VisitForStmt(ForStmt S);
	Stmt VisitGotoStmt(GotoStmt S);
	Stmt VisitIndirectGotoStmt(IndirectGotoStmt S);
	Stmt VisitContinueStmt(ContinueStmt S);
	Stmt VisitBreakStmt(BreakStmt S);
	Stmt VisitReturnStmt(ReturnStmt S);
	// FIXME: MSAsmStmt
	// FIXME: SEHExceptStmt
	// FIXME: SEHFinallyStmt
	// FIXME: SEHTryStmt
	// FIXME: SEHLeaveStmt
	// FIXME: CapturedStmt
	Stmt VisitCXXCatchStmt(CXXCatchStmt S);
	Stmt VisitCXXTryStmt(CXXTryStmt S);
	Stmt VisitCXXForRangeStmt(CXXForRangeStmt S);
	// FIXME: MSDependentExistsStmt
	Stmt VisitObjCForCollectionStmt(ObjCForCollectionStmt S);
	Stmt VisitObjCAtCatchStmt(ObjCAtCatchStmt S);
	Stmt VisitObjCAtFinallyStmt(ObjCAtFinallyStmt S);
	Stmt VisitObjCAtTryStmt(ObjCAtTryStmt S);
	Stmt VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt S);
	Stmt VisitObjCAtThrowStmt(ObjCAtThrowStmt S);
	Stmt VisitObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt S);

	// Importing expressions
	Expr VisitExpr(Expr E);
	Expr VisitVAArgExpr(VAArgExpr E);
	Expr VisitGNUNullExpr(GNUNullExpr E);
	Expr VisitPredefinedExpr(PredefinedExpr E);
	Expr VisitDeclRefExpr(DeclRefExpr E);
	Expr VisitImplicitValueInitExpr(ImplicitValueInitExpr ILE);
	Expr VisitDesignatedInitExpr(DesignatedInitExpr E);
	Expr VisitCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr E);
	Expr VisitIntegerLiteral(IntegerLiteral E);
	Expr VisitFloatingLiteral(FloatingLiteral E);
	Expr VisitCharacterLiteral(CharacterLiteral E);
	Expr VisitStringLiteral(StringLiteral E);
	Expr VisitCompoundLiteralExpr(CompoundLiteralExpr E);
	Expr VisitAtomicExpr(AtomicExpr E);
	Expr VisitAddrLabelExpr(AddrLabelExpr E);
	Expr VisitParenExpr(ParenExpr E);
	Expr VisitParenListExpr(ParenListExpr E);
	Expr VisitStmtExpr(StmtExpr E);
	Expr VisitUnaryOperator(UnaryOperator E);
	Expr VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr E);
	Expr VisitBinaryOperator(BinaryOperator E);
	Expr VisitConditionalOperator(ConditionalOperator E);
	Expr VisitBinaryConditionalOperator(BinaryConditionalOperator E);
	Expr VisitOpaqueValueExpr(OpaqueValueExpr E);
	Expr VisitArrayTypeTraitExpr(ArrayTypeTraitExpr E);
	Expr VisitExpressionTraitExpr(ExpressionTraitExpr E);
	Expr VisitArraySubscriptExpr(ArraySubscriptExpr E);
	Expr VisitCompoundAssignOperator(CompoundAssignOperator E);
	Expr VisitImplicitCastExpr(ImplicitCastExpr E);
	Expr VisitExplicitCastExpr(ExplicitCastExpr E);
	Expr VisitOffsetOfExpr(OffsetOfExpr OE);
	Expr VisitCXXThrowExpr(CXXThrowExpr E);
	Expr VisitCXXNoexceptExpr(CXXNoexceptExpr E);
	Expr VisitCXXDefaultArgExpr(CXXDefaultArgExpr E);
	Expr VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr E);
	Expr VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr E);
	Expr VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr CE);
	Expr VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr E);
	Expr VisitCXXNewExpr(CXXNewExpr CE);
	Expr VisitCXXDeleteExpr(CXXDeleteExpr E);
	Expr VisitCXXConstructExpr(CXXConstructExpr E);
	Expr VisitCXXMemberCallExpr(CXXMemberCallExpr E);
	Expr VisitExprWithCleanups(ExprWithCleanups EWC);
	Expr VisitCXXThisExpr(CXXThisExpr E);
	Expr VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr E);
	Expr VisitMemberExpr(MemberExpr E);
	Expr VisitCallExpr(CallExpr E);
	Expr VisitInitListExpr(InitListExpr E);
	Expr VisitArrayInitLoopExpr(ArrayInitLoopExpr E);
	Expr VisitArrayInitIndexExpr(ArrayInitIndexExpr E);
	Expr VisitCXXDefaultInitExpr(CXXDefaultInitExpr E);
	Expr VisitCXXNamedCastExpr(CXXNamedCastExpr E);
	Expr VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr E);


	template<typename IIter, typename OIter>
	void ImportArray(IIter Ibegin, IIter Iend, OIter Obegin) {
	typedef typename std::remove_reference<decltype(*Obegin)>::type ItemT;
	ASTImporter &ImporterRef = Importer;
	std::transform(Ibegin, Iend, Obegin,
	[&ImporterRef](ItemT From) -> ItemT {
	return ImporterRef.Import(From);
	});
	}

	template<typename IIter, typename OIter>
	bool ImportArrayChecked(IIter Ibegin, IIter Iend, OIter Obegin) {
	typedef typename std::remove_reference<decltype(**Obegin)>::type ItemT;
	ASTImporter &ImporterRef = Importer;
	bool Failed = false;
	std::transform(Ibegin, Iend, Obegin,
	[&ImporterRef, &Failed](ItemT From) -> ItemT {
	ItemT *To = cast_or_null<ItemT>(
	ImporterRef.Import(From));
	if (!To && From)
	Failed = true;
	return To;
	});
	return Failed;
	}

	template<typename InContainerTy, typename OutContainerTy>
	bool ImportContainerChecked(const InContainerTy &InContainer,
	OutContainerTy &OutContainer) {
	return ImportArrayChecked(InContainer.begin(), InContainer.end(),
	OutContainer.begin());
	}

	template<typename InContainerTy, typename OIter>
	bool ImportArrayChecked(const InContainerTy &InContainer, OIter Obegin) {
	return ImportArrayChecked(InContainer.begin(), InContainer.end(), Obegin);
	}

	// Importing overrides.
	void ImportOverrides(CXXMethodDecl ToMethod, CXXMethodDecl FromMethod);
	};
	}

	//----------------------------------------------------------------------------
	// Import Types
	//----------------------------------------------------------------------------

	using namespace clang;

	QualType ASTNodeImporter::VisitType(const Type *T) {
	Importer.FromDiag(SourceLocation(), diag::err_unsupported_ast_node)
	<< T->getTypeClassName();
	return QualType();
	}

	QualType ASTNodeImporter::VisitAtomicType(const AtomicType *T){
	QualType UnderlyingType = Importer.Import(T->getValueType());
	if(UnderlyingType.isNull())
	return QualType();

	return Importer.getToContext().getAtomicType(UnderlyingType);
	}

	QualType ASTNodeImporter::VisitBuiltinType(const BuiltinType *T) {
	switch (T->getKind()) {
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id: \
	return Importer.getToContext().SingletonId;
	#include "clang/Basic/OpenCLImageTypes.def"
	#define SHARED_SINGLETON_TYPE(Expansion)
	#define BUILTIN_TYPE(Id, SingletonId) \
	case BuiltinType::Id: return Importer.getToContext().SingletonId;
	#include "clang/AST/BuiltinTypes.def"

	// FIXME: for Char16, Char32, and NullPtr, make sure that the "to"
	// context supports C++.

	// FIXME: for ObjCId, ObjCClass, and ObjCSel, make sure that the "to"
	// context supports ObjC.

	case BuiltinType::Char_U:
	// The context we're importing from has an unsigned 'char'. If we're
	// importing into a context with a signed 'char', translate to
	// 'unsigned char' instead.
	if (Importer.getToContext().getLangOpts().CharIsSigned)
	return Importer.getToContext().UnsignedCharTy;

	return Importer.getToContext().CharTy;

	case BuiltinType::Char_S:
	// The context we're importing from has an unsigned 'char'. If we're
	// importing into a context with a signed 'char', translate to
	// 'unsigned char' instead.
	if (!Importer.getToContext().getLangOpts().CharIsSigned)
	return Importer.getToContext().SignedCharTy;

	return Importer.getToContext().CharTy;

	case BuiltinType::WChar_S:
	case BuiltinType::WChar_U:
	// FIXME: If not in C++, shall we translate to the C equivalent of
	// wchar_t?
	return Importer.getToContext().WCharTy;
	}

	llvm_unreachable("Invalid BuiltinType Kind!");
	}

	QualType ASTNodeImporter::VisitDecayedType(const DecayedType *T) {
	QualType OrigT = Importer.Import(T->getOriginalType());
	if (OrigT.isNull())
	return QualType();

	return Importer.getToContext().getDecayedType(OrigT);
	}

	QualType ASTNodeImporter::VisitComplexType(const ComplexType *T) {
	QualType ToElementType = Importer.Import(T->getElementType());
	if (ToElementType.isNull())
	return QualType();

	return Importer.getToContext().getComplexType(ToElementType);
	}

	QualType ASTNodeImporter::VisitPointerType(const PointerType *T) {
	QualType ToPointeeType = Importer.Import(T->getPointeeType());
	if (ToPointeeType.isNull())
	return QualType();

	return Importer.getToContext().getPointerType(ToPointeeType);
	}

	QualType ASTNodeImporter::VisitBlockPointerType(const BlockPointerType *T) {
	// FIXME: Check for blocks support in "to" context.
	QualType ToPointeeType = Importer.Import(T->getPointeeType());
	if (ToPointeeType.isNull())
	return QualType();

	return Importer.getToContext().getBlockPointerType(ToPointeeType);
	}

	QualType
	ASTNodeImporter::VisitLValueReferenceType(const LValueReferenceType *T) {
	// FIXME: Check for C++ support in "to" context.
	QualType ToPointeeType = Importer.Import(T->getPointeeTypeAsWritten());
	if (ToPointeeType.isNull())
	return QualType();

	return Importer.getToContext().getLValueReferenceType(ToPointeeType);
	}

	QualType
	ASTNodeImporter::VisitRValueReferenceType(const RValueReferenceType *T) {
	// FIXME: Check for C++0x support in "to" context.
	QualType ToPointeeType = Importer.Import(T->getPointeeTypeAsWritten());
	if (ToPointeeType.isNull())
	return QualType();

	return Importer.getToContext().getRValueReferenceType(ToPointeeType);
	}

	QualType ASTNodeImporter::VisitMemberPointerType(const MemberPointerType *T) {
	// FIXME: Check for C++ support in "to" context.
	QualType ToPointeeType = Importer.Import(T->getPointeeType());
	if (ToPointeeType.isNull())
	return QualType();

	QualType ClassType = Importer.Import(QualType(T->getClass(), 0));
	return Importer.getToContext().getMemberPointerType(ToPointeeType,
	ClassType.getTypePtr());
	}

	QualType ASTNodeImporter::VisitConstantArrayType(const ConstantArrayType *T) {
	QualType ToElementType = Importer.Import(T->getElementType());
	if (ToElementType.isNull())
	return QualType();

	return Importer.getToContext().getConstantArrayType(ToElementType,
	T->getSize(),
	T->getSizeModifier(),
	T->getIndexTypeCVRQualifiers());
	}

	QualType
	ASTNodeImporter::VisitIncompleteArrayType(const IncompleteArrayType *T) {
	QualType ToElementType = Importer.Import(T->getElementType());
	if (ToElementType.isNull())
	return QualType();

	return Importer.getToContext().getIncompleteArrayType(ToElementType,
	T->getSizeModifier(),
	T->getIndexTypeCVRQualifiers());
	}

	QualType ASTNodeImporter::VisitVariableArrayType(const VariableArrayType *T) {
	QualType ToElementType = Importer.Import(T->getElementType());
	if (ToElementType.isNull())
	return QualType();

	Expr *Size = Importer.Import(T->getSizeExpr());
	if (!Size)
	return QualType();

	SourceRange Brackets = Importer.Import(T->getBracketsRange());
	return Importer.getToContext().getVariableArrayType(ToElementType, Size,
	T->getSizeModifier(),
	T->getIndexTypeCVRQualifiers(),
	Brackets);
	}

	QualType ASTNodeImporter::VisitVectorType(const VectorType *T) {
	QualType ToElementType = Importer.Import(T->getElementType());
	if (ToElementType.isNull())
	return QualType();

	return Importer.getToContext().getVectorType(ToElementType,
	T->getNumElements(),
	T->getVectorKind());
	}

	QualType ASTNodeImporter::VisitExtVectorType(const ExtVectorType *T) {
	QualType ToElementType = Importer.Import(T->getElementType());
	if (ToElementType.isNull())
	return QualType();

	return Importer.getToContext().getExtVectorType(ToElementType,
	T->getNumElements());
	}

	QualType
	ASTNodeImporter::VisitFunctionNoProtoType(const FunctionNoProtoType *T) {
	// FIXME: What happens if we're importing a function without a prototype
	// into C++? Should we make it variadic?
	QualType ToResultType = Importer.Import(T->getReturnType());
	if (ToResultType.isNull())
	return QualType();

	return Importer.getToContext().getFunctionNoProtoType(ToResultType,
	T->getExtInfo());
	}

	QualType ASTNodeImporter::VisitFunctionProtoType(const FunctionProtoType *T) {
	QualType ToResultType = Importer.Import(T->getReturnType());
	if (ToResultType.isNull())
	return QualType();

	// Import argument types
	SmallVector<QualType, 4> ArgTypes;
	for (const auto &A : T->param_types()) {
	QualType ArgType = Importer.Import(A);
	if (ArgType.isNull())
	return QualType();
	ArgTypes.push_back(ArgType);
	}

	// Import exception types
	SmallVector<QualType, 4> ExceptionTypes;
	for (const auto &E : T->exceptions()) {
	QualType ExceptionType = Importer.Import(E);
	if (ExceptionType.isNull())
	return QualType();
	ExceptionTypes.push_back(ExceptionType);
	}

	FunctionProtoType::ExtProtoInfo FromEPI = T->getExtProtoInfo();
	FunctionProtoType::ExtProtoInfo ToEPI;

	ToEPI.ExtInfo = FromEPI.ExtInfo;
	ToEPI.Variadic = FromEPI.Variadic;
	ToEPI.HasTrailingReturn = FromEPI.HasTrailingReturn;
	ToEPI.TypeQuals = FromEPI.TypeQuals;
	ToEPI.RefQualifier = FromEPI.RefQualifier;
	ToEPI.ExceptionSpec.Type = FromEPI.ExceptionSpec.Type;
	ToEPI.ExceptionSpec.Exceptions = ExceptionTypes;
	ToEPI.ExceptionSpec.NoexceptExpr =
	Importer.Import(FromEPI.ExceptionSpec.NoexceptExpr);
	ToEPI.ExceptionSpec.SourceDecl = cast_or_null<FunctionDecl>(
	Importer.Import(FromEPI.ExceptionSpec.SourceDecl));
	ToEPI.ExceptionSpec.SourceTemplate = cast_or_null<FunctionDecl>(
	Importer.Import(FromEPI.ExceptionSpec.SourceTemplate));

	return Importer.getToContext().getFunctionType(ToResultType, ArgTypes, ToEPI);
	}

	QualType ASTNodeImporter::VisitParenType(const ParenType *T) {
	QualType ToInnerType = Importer.Import(T->getInnerType());
	if (ToInnerType.isNull())
	return QualType();

	return Importer.getToContext().getParenType(ToInnerType);
	}

	QualType ASTNodeImporter::VisitTypedefType(const TypedefType *T) {
	TypedefNameDecl *ToDecl
	= dyn_cast_or_null<TypedefNameDecl>(Importer.Import(T->getDecl()));
	if (!ToDecl)
	return QualType();

	return Importer.getToContext().getTypeDeclType(ToDecl);
	}

	QualType ASTNodeImporter::VisitTypeOfExprType(const TypeOfExprType *T) {
	Expr *ToExpr = Importer.Import(T->getUnderlyingExpr());
	if (!ToExpr)
	return QualType();

	return Importer.getToContext().getTypeOfExprType(ToExpr);
	}

	QualType ASTNodeImporter::VisitTypeOfType(const TypeOfType *T) {
	QualType ToUnderlyingType = Importer.Import(T->getUnderlyingType());
	if (ToUnderlyingType.isNull())
	return QualType();

	return Importer.getToContext().getTypeOfType(ToUnderlyingType);
	}

	QualType ASTNodeImporter::VisitDecltypeType(const DecltypeType *T) {
	// FIXME: Make sure that the "to" context supports C++0x!
	Expr *ToExpr = Importer.Import(T->getUnderlyingExpr());
	if (!ToExpr)
	return QualType();

	QualType UnderlyingType = Importer.Import(T->getUnderlyingType());
	if (UnderlyingType.isNull())
	return QualType();

	return Importer.getToContext().getDecltypeType(ToExpr, UnderlyingType);
	}

	QualType ASTNodeImporter::VisitUnaryTransformType(const UnaryTransformType *T) {
	QualType ToBaseType = Importer.Import(T->getBaseType());
	QualType ToUnderlyingType = Importer.Import(T->getUnderlyingType());
	if (ToBaseType.isNull() \|\| ToUnderlyingType.isNull())
	return QualType();

	return Importer.getToContext().getUnaryTransformType(ToBaseType,
	ToUnderlyingType,
	T->getUTTKind());
	}

	QualType ASTNodeImporter::VisitAutoType(const AutoType *T) {
	// FIXME: Make sure that the "to" context supports C++11!
	QualType FromDeduced = T->getDeducedType();
	QualType ToDeduced;
	if (!FromDeduced.isNull()) {
	ToDeduced = Importer.Import(FromDeduced);
	if (ToDeduced.isNull())
	return QualType();
	}

	return Importer.getToContext().getAutoType(ToDeduced, T->getKeyword(),
	/IsDependent/false);
	}

	QualType ASTNodeImporter::VisitInjectedClassNameType(
	const InjectedClassNameType *T) {
	CXXRecordDecl *D = cast_or_null<CXXRecordDecl>(Importer.Import(T->getDecl()));
	if (!D)
	return QualType();

	QualType InjType = Importer.Import(T->getInjectedSpecializationType());
	if (InjType.isNull())
	return QualType();

	// FIXME: ASTContext::getInjectedClassNameType is not suitable for AST reading
	// See comments in InjectedClassNameType definition for details
	// return Importer.getToContext().getInjectedClassNameType(D, InjType);
	enum {
	TypeAlignmentInBits = 4,
	TypeAlignment = 1 << TypeAlignmentInBits
	};

	return QualType(new (Importer.getToContext(), TypeAlignment)
	InjectedClassNameType(D, InjType), 0);
	}

	QualType ASTNodeImporter::VisitRecordType(const RecordType *T) {
	RecordDecl *ToDecl
	= dyn_cast_or_null<RecordDecl>(Importer.Import(T->getDecl()));
	if (!ToDecl)
	return QualType();

	return Importer.getToContext().getTagDeclType(ToDecl);
	}

	QualType ASTNodeImporter::VisitEnumType(const EnumType *T) {
	EnumDecl *ToDecl
	= dyn_cast_or_null<EnumDecl>(Importer.Import(T->getDecl()));
	if (!ToDecl)
	return QualType();

	return Importer.getToContext().getTagDeclType(ToDecl);
	}

	QualType ASTNodeImporter::VisitAttributedType(const AttributedType *T) {
	QualType FromModifiedType = T->getModifiedType();
	QualType FromEquivalentType = T->getEquivalentType();
	QualType ToModifiedType;
	QualType ToEquivalentType;

	if (!FromModifiedType.isNull()) {
	ToModifiedType = Importer.Import(FromModifiedType);
	if (ToModifiedType.isNull())
	return QualType();
	}
	if (!FromEquivalentType.isNull()) {
	ToEquivalentType = Importer.Import(FromEquivalentType);
	if (ToEquivalentType.isNull())
	return QualType();
	}

	return Importer.getToContext().getAttributedType(T->getAttrKind(),
	ToModifiedType, ToEquivalentType);
	}


	QualType ASTNodeImporter::VisitTemplateTypeParmType(
	const TemplateTypeParmType *T) {
	TemplateTypeParmDecl *ParmDecl =
	cast_or_null<TemplateTypeParmDecl>(Importer.Import(T->getDecl()));
	if (!ParmDecl && T->getDecl())
	return QualType();

	return Importer.getToContext().getTemplateTypeParmType(
	T->getDepth(), T->getIndex(), T->isParameterPack(), ParmDecl);
	}

	QualType ASTNodeImporter::VisitSubstTemplateTypeParmType(
	const SubstTemplateTypeParmType *T) {
	const TemplateTypeParmType *Replaced =
	cast_or_null<TemplateTypeParmType>(Importer.Import(
	QualType(T->getReplacedParameter(), 0)).getTypePtr());
	if (!Replaced)
	return QualType();

	QualType Replacement = Importer.Import(T->getReplacementType());
	if (Replacement.isNull())
	return QualType();
	Replacement = Replacement.getCanonicalType();

	return Importer.getToContext().getSubstTemplateTypeParmType(
	Replaced, Replacement);
	}

	QualType ASTNodeImporter::VisitTemplateSpecializationType(
	const TemplateSpecializationType *T) {
	TemplateName ToTemplate = Importer.Import(T->getTemplateName());
	if (ToTemplate.isNull())
	return QualType();

	SmallVector<TemplateArgument, 2> ToTemplateArgs;
	if (ImportTemplateArguments(T->getArgs(), T->getNumArgs(), ToTemplateArgs))
	return QualType();

	QualType ToCanonType;
	if (!QualType(T, 0).isCanonical()) {
	QualType FromCanonType
	= Importer.getFromContext().getCanonicalType(QualType(T, 0));
	ToCanonType =Importer.Import(FromCanonType);
	if (ToCanonType.isNull())
	return QualType();
	}
	return Importer.getToContext().getTemplateSpecializationType(ToTemplate,
	ToTemplateArgs,
	ToCanonType);
	}

	QualType ASTNodeImporter::VisitElaboratedType(const ElaboratedType *T) {
	NestedNameSpecifier *ToQualifier = nullptr;
	// Note: the qualifier in an ElaboratedType is optional.
	if (T->getQualifier()) {
	ToQualifier = Importer.Import(T->getQualifier());
	if (!ToQualifier)
	return QualType();
	}

	QualType ToNamedType = Importer.Import(T->getNamedType());
	if (ToNamedType.isNull())
	return QualType();

	return Importer.getToContext().getElaboratedType(T->getKeyword(),
	ToQualifier, ToNamedType);
	}

	QualType ASTNodeImporter::VisitObjCInterfaceType(const ObjCInterfaceType *T) {
	ObjCInterfaceDecl *Class
	= dyn_cast_or_null<ObjCInterfaceDecl>(Importer.Import(T->getDecl()));
	if (!Class)
	return QualType();

	return Importer.getToContext().getObjCInterfaceType(Class);
	}

	QualType ASTNodeImporter::VisitObjCObjectType(const ObjCObjectType *T) {
	QualType ToBaseType = Importer.Import(T->getBaseType());
	if (ToBaseType.isNull())
	return QualType();

	SmallVector<QualType, 4> TypeArgs;
	for (auto TypeArg : T->getTypeArgsAsWritten()) {
	QualType ImportedTypeArg = Importer.Import(TypeArg);
	if (ImportedTypeArg.isNull())
	return QualType();

	TypeArgs.push_back(ImportedTypeArg);
	}

	SmallVector<ObjCProtocolDecl *, 4> Protocols;
	for (auto *P : T->quals()) {
	ObjCProtocolDecl *Protocol
	= dyn_cast_or_null<ObjCProtocolDecl>(Importer.Import(P));
	if (!Protocol)
	return QualType();
	Protocols.push_back(Protocol);
	}

	return Importer.getToContext().getObjCObjectType(ToBaseType, TypeArgs,
	Protocols,
	T->isKindOfTypeAsWritten());
	}

	QualType
	ASTNodeImporter::VisitObjCObjectPointerType(const ObjCObjectPointerType *T) {
	QualType ToPointeeType = Importer.Import(T->getPointeeType());
	if (ToPointeeType.isNull())
	return QualType();

	return Importer.getToContext().getObjCObjectPointerType(ToPointeeType);
	}

	//----------------------------------------------------------------------------
	// Import Declarations
	//----------------------------------------------------------------------------
	bool ASTNodeImporter::ImportDeclParts(NamedDecl D, DeclContext &DC,
	DeclContext *&LexicalDC,
	DeclarationName &Name,
	NamedDecl *&ToD,
	SourceLocation &Loc) {
	// Import the context of this declaration.
	DC = Importer.ImportContext(D->getDeclContext());
	if (!DC)
	return true;

	LexicalDC = DC;
	if (D->getDeclContext() != D->getLexicalDeclContext()) {
	LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());
	if (!LexicalDC)
	return true;
	}

	// Import the name of this declaration.
	Name = Importer.Import(D->getDeclName());
	if (D->getDeclName() && !Name)
	return true;

	// Import the location of this declaration.
	Loc = Importer.Import(D->getLocation());
	ToD = cast_or_null<NamedDecl>(Importer.GetAlreadyImportedOrNull(D));
	return false;
	}

	void ASTNodeImporter::ImportDefinitionIfNeeded(Decl FromD, Decl ToD) {
	if (!FromD)
	return;

	if (!ToD) {
	ToD = Importer.Import(FromD);
	if (!ToD)
	return;
	}

	if (RecordDecl *FromRecord = dyn_cast<RecordDecl>(FromD)) {
	if (RecordDecl *ToRecord = cast_or_null<RecordDecl>(ToD)) {
	if (FromRecord->getDefinition() && FromRecord->isCompleteDefinition() && !ToRecord->getDefinition()) {
	ImportDefinition(FromRecord, ToRecord);
	}
	}
	return;
	}

	if (EnumDecl *FromEnum = dyn_cast<EnumDecl>(FromD)) {
	if (EnumDecl *ToEnum = cast_or_null<EnumDecl>(ToD)) {
	if (FromEnum->getDefinition() && !ToEnum->getDefinition()) {
	ImportDefinition(FromEnum, ToEnum);
	}
	}
	return;
	}
	}

	void
	ASTNodeImporter::ImportDeclarationNameLoc(const DeclarationNameInfo &From,
	DeclarationNameInfo& To) {
	// NOTE: To.Name and To.Loc are already imported.
	// We only have to import To.LocInfo.
	switch (To.getName().getNameKind()) {
	case DeclarationName::Identifier:
	case DeclarationName::ObjCZeroArgSelector:
	case DeclarationName::ObjCOneArgSelector:
	case DeclarationName::ObjCMultiArgSelector:
	case DeclarationName::CXXUsingDirective:
	case DeclarationName::CXXDeductionGuideName:
	return;

	case DeclarationName::CXXOperatorName: {
	SourceRange Range = From.getCXXOperatorNameRange();
	To.setCXXOperatorNameRange(Importer.Import(Range));
	return;
	}
	case DeclarationName::CXXLiteralOperatorName: {
	SourceLocation Loc = From.getCXXLiteralOperatorNameLoc();
	To.setCXXLiteralOperatorNameLoc(Importer.Import(Loc));
	return;
	}
	case DeclarationName::CXXConstructorName:
	case DeclarationName::CXXDestructorName:
	case DeclarationName::CXXConversionFunctionName: {
	TypeSourceInfo *FromTInfo = From.getNamedTypeInfo();
	To.setNamedTypeInfo(Importer.Import(FromTInfo));
	return;
	}
	}
	llvm_unreachable("Unknown name kind.");
	}

	void ASTNodeImporter::ImportDeclContext(DeclContext *FromDC, bool ForceImport) {
	if (Importer.isMinimalImport() && !ForceImport) {
	Importer.ImportContext(FromDC);
	return;
	}

	for (auto *From : FromDC->decls())
	Importer.Import(From);
	}

	bool ASTNodeImporter::ImportDefinition(RecordDecl From, RecordDecl To,
	ImportDefinitionKind Kind) {
	if (To->getDefinition() \|\| To->isBeingDefined()) {
	if (Kind == IDK_Everything)
	ImportDeclContext(From, /ForceImport=/true);

	return false;
	}

	To->startDefinition();

	// Add base classes.
	if (CXXRecordDecl *ToCXX = dyn_cast<CXXRecordDecl>(To)) {
	CXXRecordDecl *FromCXX = cast<CXXRecordDecl>(From);

	struct CXXRecordDecl::DefinitionData &ToData = ToCXX->data();
	struct CXXRecordDecl::DefinitionData &FromData = FromCXX->data();
	ToData.UserDeclaredConstructor = FromData.UserDeclaredConstructor;
	ToData.UserDeclaredSpecialMembers = FromData.UserDeclaredSpecialMembers;
	ToData.Aggregate = FromData.Aggregate;
	ToData.PlainOldData = FromData.PlainOldData;
	ToData.Empty = FromData.Empty;
	ToData.Polymorphic = FromData.Polymorphic;
	ToData.Abstract = FromData.Abstract;
	ToData.IsStandardLayout = FromData.IsStandardLayout;
	ToData.HasNoNonEmptyBases = FromData.HasNoNonEmptyBases;
	ToData.HasPrivateFields = FromData.HasPrivateFields;
	ToData.HasProtectedFields = FromData.HasProtectedFields;
	ToData.HasPublicFields = FromData.HasPublicFields;
	ToData.HasMutableFields = FromData.HasMutableFields;
	ToData.HasVariantMembers = FromData.HasVariantMembers;
	ToData.HasOnlyCMembers = FromData.HasOnlyCMembers;
	ToData.HasInClassInitializer = FromData.HasInClassInitializer;
	ToData.HasUninitializedReferenceMember
	= FromData.HasUninitializedReferenceMember;
	ToData.HasUninitializedFields = FromData.HasUninitializedFields;
	ToData.HasInheritedConstructor = FromData.HasInheritedConstructor;
	ToData.HasInheritedAssignment = FromData.HasInheritedAssignment;
	+ ToData.NeedOverloadResolutionForCopyConstructor
	+ = FromData.NeedOverloadResolutionForCopyConstructor;
	ToData.NeedOverloadResolutionForMoveConstructor
	= FromData.NeedOverloadResolutionForMoveConstructor;
	ToData.NeedOverloadResolutionForMoveAssignment
	= FromData.NeedOverloadResolutionForMoveAssignment;
	ToData.NeedOverloadResolutionForDestructor
	= FromData.NeedOverloadResolutionForDestructor;
	+ ToData.DefaultedCopyConstructorIsDeleted
	+ = FromData.DefaultedCopyConstructorIsDeleted;
	ToData.DefaultedMoveConstructorIsDeleted
	= FromData.DefaultedMoveConstructorIsDeleted;
	ToData.DefaultedMoveAssignmentIsDeleted
	= FromData.DefaultedMoveAssignmentIsDeleted;
	ToData.DefaultedDestructorIsDeleted = FromData.DefaultedDestructorIsDeleted;
	ToData.HasTrivialSpecialMembers = FromData.HasTrivialSpecialMembers;
	ToData.HasIrrelevantDestructor = FromData.HasIrrelevantDestructor;
	ToData.HasConstexprNonCopyMoveConstructor
	= FromData.HasConstexprNonCopyMoveConstructor;
	ToData.HasDefaultedDefaultConstructor
	= FromData.HasDefaultedDefaultConstructor;
	+ ToData.CanPassInRegisters = FromData.CanPassInRegisters;
	ToData.DefaultedDefaultConstructorIsConstexpr
	= FromData.DefaultedDefaultConstructorIsConstexpr;
	ToData.HasConstexprDefaultConstructor
	= FromData.HasConstexprDefaultConstructor;
	ToData.HasNonLiteralTypeFieldsOrBases
	= FromData.HasNonLiteralTypeFieldsOrBases;
	// ComputedVisibleConversions not imported.
	ToData.UserProvidedDefaultConstructor
	= FromData.UserProvidedDefaultConstructor;
	ToData.DeclaredSpecialMembers = FromData.DeclaredSpecialMembers;
	ToData.ImplicitCopyConstructorCanHaveConstParamForVBase
	= FromData.ImplicitCopyConstructorCanHaveConstParamForVBase;
	ToData.ImplicitCopyConstructorCanHaveConstParamForNonVBase
	= FromData.ImplicitCopyConstructorCanHaveConstParamForNonVBase;
	ToData.ImplicitCopyAssignmentHasConstParam
	= FromData.ImplicitCopyAssignmentHasConstParam;
	ToData.HasDeclaredCopyConstructorWithConstParam
	= FromData.HasDeclaredCopyConstructorWithConstParam;
	ToData.HasDeclaredCopyAssignmentWithConstParam
	= FromData.HasDeclaredCopyAssignmentWithConstParam;
	ToData.IsLambda = FromData.IsLambda;

	SmallVector<CXXBaseSpecifier *, 4> Bases;
	for (const auto &Base1 : FromCXX->bases()) {
	QualType T = Importer.Import(Base1.getType());
	if (T.isNull())
	return true;

	SourceLocation EllipsisLoc;
	if (Base1.isPackExpansion())
	EllipsisLoc = Importer.Import(Base1.getEllipsisLoc());

	// Ensure that we have a definition for the base.
	ImportDefinitionIfNeeded(Base1.getType()->getAsCXXRecordDecl());

	Bases.push_back(
	new (Importer.getToContext())
	CXXBaseSpecifier(Importer.Import(Base1.getSourceRange()),
	Base1.isVirtual(),
	Base1.isBaseOfClass(),
	Base1.getAccessSpecifierAsWritten(),
	Importer.Import(Base1.getTypeSourceInfo()),
	EllipsisLoc));
	}
	if (!Bases.empty())
	ToCXX->setBases(Bases.data(), Bases.size());
	}

	if (shouldForceImportDeclContext(Kind))
	ImportDeclContext(From, /ForceImport=/true);

	To->completeDefinition();
	return false;
	}

	bool ASTNodeImporter::ImportDefinition(VarDecl From, VarDecl To,
	ImportDefinitionKind Kind) {
	if (To->getAnyInitializer())
	return false;

	// FIXME: Can we really import any initializer? Alternatively, we could force
	// ourselves to import every declaration of a variable and then only use
	// getInit() here.
	To->setInit(Importer.Import(const_cast<Expr *>(From->getAnyInitializer())));

	// FIXME: Other bits to merge?

	return false;
	}

	bool ASTNodeImporter::ImportDefinition(EnumDecl From, EnumDecl To,
	ImportDefinitionKind Kind) {
	if (To->getDefinition() \|\| To->isBeingDefined()) {
	if (Kind == IDK_Everything)
	ImportDeclContext(From, /ForceImport=/true);
	return false;
	}

	To->startDefinition();

	QualType T = Importer.Import(Importer.getFromContext().getTypeDeclType(From));
	if (T.isNull())
	return true;

	QualType ToPromotionType = Importer.Import(From->getPromotionType());
	if (ToPromotionType.isNull())
	return true;

	if (shouldForceImportDeclContext(Kind))
	ImportDeclContext(From, /ForceImport=/true);

	// FIXME: we might need to merge the number of positive or negative bits
	// if the enumerator lists don't match.
	To->completeDefinition(T, ToPromotionType,
	From->getNumPositiveBits(),
	From->getNumNegativeBits());
	return false;
	}

	TemplateParameterList *ASTNodeImporter::ImportTemplateParameterList(
	TemplateParameterList *Params) {
	SmallVector<NamedDecl *, 4> ToParams(Params->size());
	if (ImportContainerChecked(*Params, ToParams))
	return nullptr;

	Expr *ToRequiresClause;
	if (Expr *const R = Params->getRequiresClause()) {
	ToRequiresClause = Importer.Import(R);
	if (!ToRequiresClause)
	return nullptr;
	} else {
	ToRequiresClause = nullptr;
	}

	return TemplateParameterList::Create(Importer.getToContext(),
	Importer.Import(Params->getTemplateLoc()),
	Importer.Import(Params->getLAngleLoc()),
	ToParams,
	Importer.Import(Params->getRAngleLoc()),
	ToRequiresClause);
	}

	TemplateArgument
	ASTNodeImporter::ImportTemplateArgument(const TemplateArgument &From) {
	switch (From.getKind()) {
	case TemplateArgument::Null:
	return TemplateArgument();

	case TemplateArgument::Type: {
	QualType ToType = Importer.Import(From.getAsType());
	if (ToType.isNull())
	return TemplateArgument();
	return TemplateArgument(ToType);
	}

	case TemplateArgument::Integral: {
	QualType ToType = Importer.Import(From.getIntegralType());
	if (ToType.isNull())
	return TemplateArgument();
	return TemplateArgument(From, ToType);
	}

	case TemplateArgument::Declaration: {
	ValueDecl *To = cast_or_null<ValueDecl>(Importer.Import(From.getAsDecl()));
	QualType ToType = Importer.Import(From.getParamTypeForDecl());
	if (!To \|\| ToType.isNull())
	return TemplateArgument();
	return TemplateArgument(To, ToType);
	}

	case TemplateArgument::NullPtr: {
	QualType ToType = Importer.Import(From.getNullPtrType());
	if (ToType.isNull())
	return TemplateArgument();
	return TemplateArgument(ToType, /isNullPtr/true);
	}

	case TemplateArgument::Template: {
	TemplateName ToTemplate = Importer.Import(From.getAsTemplate());
	if (ToTemplate.isNull())
	return TemplateArgument();

	return TemplateArgument(ToTemplate);
	}

	case TemplateArgument::TemplateExpansion: {
	TemplateName ToTemplate
	= Importer.Import(From.getAsTemplateOrTemplatePattern());
	if (ToTemplate.isNull())
	return TemplateArgument();

	return TemplateArgument(ToTemplate, From.getNumTemplateExpansions());
	}

	case TemplateArgument::Expression:
	if (Expr *ToExpr = Importer.Import(From.getAsExpr()))
	return TemplateArgument(ToExpr);
	return TemplateArgument();

	case TemplateArgument::Pack: {
	SmallVector<TemplateArgument, 2> ToPack;
	ToPack.reserve(From.pack_size());
	if (ImportTemplateArguments(From.pack_begin(), From.pack_size(), ToPack))
	return TemplateArgument();

	return TemplateArgument(
	llvm::makeArrayRef(ToPack).copy(Importer.getToContext()));
	}
	}

	llvm_unreachable("Invalid template argument kind");
	}

	TemplateArgumentLoc ASTNodeImporter::ImportTemplateArgumentLoc(
	const TemplateArgumentLoc &TALoc, bool &Error) {
	Error = false;
	TemplateArgument Arg = ImportTemplateArgument(TALoc.getArgument());
	TemplateArgumentLocInfo FromInfo = TALoc.getLocInfo();
	TemplateArgumentLocInfo ToInfo;
	if (Arg.getKind() == TemplateArgument::Expression) {
	Expr *E = Importer.Import(FromInfo.getAsExpr());
	ToInfo = TemplateArgumentLocInfo(E);
	if (!E)
	Error = true;
	} else if (Arg.getKind() == TemplateArgument::Type) {
	if (TypeSourceInfo *TSI = Importer.Import(FromInfo.getAsTypeSourceInfo()))
	ToInfo = TemplateArgumentLocInfo(TSI);
	else
	Error = true;
	} else {
	ToInfo = TemplateArgumentLocInfo(
	Importer.Import(FromInfo.getTemplateQualifierLoc()),
	Importer.Import(FromInfo.getTemplateNameLoc()),
	Importer.Import(FromInfo.getTemplateEllipsisLoc()));
	}
	return TemplateArgumentLoc(Arg, ToInfo);
	}

	bool ASTNodeImporter::ImportTemplateArguments(const TemplateArgument *FromArgs,
	unsigned NumFromArgs,
	SmallVectorImpl<TemplateArgument> &ToArgs) {
	for (unsigned I = 0; I != NumFromArgs; ++I) {
	TemplateArgument To = ImportTemplateArgument(FromArgs[I]);
	if (To.isNull() && !FromArgs[I].isNull())
	return true;

	ToArgs.push_back(To);
	}

	return false;
	}

	bool ASTNodeImporter::IsStructuralMatch(RecordDecl *FromRecord,
	RecordDecl *ToRecord, bool Complain) {
	// Eliminate a potential failure point where we attempt to re-import
	// something we're trying to import while completing ToRecord.
	Decl *ToOrigin = Importer.GetOriginalDecl(ToRecord);
	if (ToOrigin) {
	RecordDecl *ToOriginRecord = dyn_cast<RecordDecl>(ToOrigin);
	if (ToOriginRecord)
	ToRecord = ToOriginRecord;
	}

	StructuralEquivalenceContext Ctx(Importer.getFromContext(),
	ToRecord->getASTContext(),
	Importer.getNonEquivalentDecls(),
	false, Complain);
	return Ctx.IsStructurallyEquivalent(FromRecord, ToRecord);
	}

	bool ASTNodeImporter::IsStructuralMatch(VarDecl FromVar, VarDecl ToVar,
	bool Complain) {
	StructuralEquivalenceContext Ctx(
	Importer.getFromContext(), Importer.getToContext(),
	Importer.getNonEquivalentDecls(), false, Complain);
	return Ctx.IsStructurallyEquivalent(FromVar, ToVar);
	}

	bool ASTNodeImporter::IsStructuralMatch(EnumDecl FromEnum, EnumDecl ToEnum) {
	StructuralEquivalenceContext Ctx(Importer.getFromContext(),
	Importer.getToContext(),
	Importer.getNonEquivalentDecls());
	return Ctx.IsStructurallyEquivalent(FromEnum, ToEnum);
	}

	bool ASTNodeImporter::IsStructuralMatch(EnumConstantDecl *FromEC,
	EnumConstantDecl *ToEC)
	{
	const llvm::APSInt &FromVal = FromEC->getInitVal();
	const llvm::APSInt &ToVal = ToEC->getInitVal();

	return FromVal.isSigned() == ToVal.isSigned() &&
	FromVal.getBitWidth() == ToVal.getBitWidth() &&
	FromVal == ToVal;
	}

	bool ASTNodeImporter::IsStructuralMatch(ClassTemplateDecl *From,
	ClassTemplateDecl *To) {
	StructuralEquivalenceContext Ctx(Importer.getFromContext(),
	Importer.getToContext(),
	Importer.getNonEquivalentDecls());
	return Ctx.IsStructurallyEquivalent(From, To);
	}

	bool ASTNodeImporter::IsStructuralMatch(VarTemplateDecl *From,
	VarTemplateDecl *To) {
	StructuralEquivalenceContext Ctx(Importer.getFromContext(),
	Importer.getToContext(),
	Importer.getNonEquivalentDecls());
	return Ctx.IsStructurallyEquivalent(From, To);
	}

	Decl ASTNodeImporter::VisitDecl(Decl D) {
	Importer.FromDiag(D->getLocation(), diag::err_unsupported_ast_node)
	<< D->getDeclKindName();
	return nullptr;
	}

	Decl ASTNodeImporter::VisitTranslationUnitDecl(TranslationUnitDecl D) {
	TranslationUnitDecl *ToD =
	Importer.getToContext().getTranslationUnitDecl();

	Importer.Imported(D, ToD);

	return ToD;
	}

	Decl ASTNodeImporter::VisitAccessSpecDecl(AccessSpecDecl D) {

	SourceLocation Loc = Importer.Import(D->getLocation());
	SourceLocation ColonLoc = Importer.Import(D->getColonLoc());

	// Import the context of this declaration.
	DeclContext *DC = Importer.ImportContext(D->getDeclContext());
	if (!DC)
	return nullptr;

	AccessSpecDecl *accessSpecDecl
	= AccessSpecDecl::Create(Importer.getToContext(), D->getAccess(),
	DC, Loc, ColonLoc);

	if (!accessSpecDecl)
	return nullptr;

	// Lexical DeclContext and Semantic DeclContext
	// is always the same for the accessSpec.
	accessSpecDecl->setLexicalDeclContext(DC);
	DC->addDeclInternal(accessSpecDecl);

	return accessSpecDecl;
	}

	Decl ASTNodeImporter::VisitStaticAssertDecl(StaticAssertDecl D) {
	DeclContext *DC = Importer.ImportContext(D->getDeclContext());
	if (!DC)
	return nullptr;

	DeclContext *LexicalDC = DC;

	// Import the location of this declaration.
	SourceLocation Loc = Importer.Import(D->getLocation());

	Expr *AssertExpr = Importer.Import(D->getAssertExpr());
	if (!AssertExpr)
	return nullptr;

	StringLiteral *FromMsg = D->getMessage();
	StringLiteral *ToMsg = cast_or_null<StringLiteral>(Importer.Import(FromMsg));
	if (!ToMsg && FromMsg)
	return nullptr;

	StaticAssertDecl *ToD = StaticAssertDecl::Create(
	Importer.getToContext(), DC, Loc, AssertExpr, ToMsg,
	Importer.Import(D->getRParenLoc()), D->isFailed());

	ToD->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(ToD);
	Importer.Imported(D, ToD);
	return ToD;
	}

	Decl ASTNodeImporter::VisitNamespaceDecl(NamespaceDecl D) {
	// Import the major distinguishing characteristics of this namespace.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	NamespaceDecl *MergeWithNamespace = nullptr;
	if (!Name) {
	// This is an anonymous namespace. Adopt an existing anonymous
	// namespace if we can.
	// FIXME: Not testable.
	if (TranslationUnitDecl *TU = dyn_cast<TranslationUnitDecl>(DC))
	MergeWithNamespace = TU->getAnonymousNamespace();
	else
	MergeWithNamespace = cast<NamespaceDecl>(DC)->getAnonymousNamespace();
	} else {
	SmallVector<NamedDecl *, 4> ConflictingDecls;
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (!FoundDecls[I]->isInIdentifierNamespace(Decl::IDNS_Namespace))
	continue;

	if (NamespaceDecl *FoundNS = dyn_cast<NamespaceDecl>(FoundDecls[I])) {
	MergeWithNamespace = FoundNS;
	ConflictingDecls.clear();
	break;
	}

	ConflictingDecls.push_back(FoundDecls[I]);
	}

	if (!ConflictingDecls.empty()) {
	Name = Importer.HandleNameConflict(Name, DC, Decl::IDNS_Namespace,
	ConflictingDecls.data(),
	ConflictingDecls.size());
	}
	}

	// Create the "to" namespace, if needed.
	NamespaceDecl *ToNamespace = MergeWithNamespace;
	if (!ToNamespace) {
	ToNamespace = NamespaceDecl::Create(Importer.getToContext(), DC,
	D->isInline(),
	Importer.Import(D->getLocStart()),
	Loc, Name.getAsIdentifierInfo(),
	/PrevDecl=/nullptr);
	ToNamespace->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(ToNamespace);

	// If this is an anonymous namespace, register it as the anonymous
	// namespace within its context.
	if (!Name) {
	if (TranslationUnitDecl *TU = dyn_cast<TranslationUnitDecl>(DC))
	TU->setAnonymousNamespace(ToNamespace);
	else
	cast<NamespaceDecl>(DC)->setAnonymousNamespace(ToNamespace);
	}
	}
	Importer.Imported(D, ToNamespace);

	ImportDeclContext(D);

	return ToNamespace;
	}

	Decl ASTNodeImporter::VisitTypedefNameDecl(TypedefNameDecl D, bool IsAlias) {
	// Import the major distinguishing characteristics of this typedef.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	// If this typedef is not in block scope, determine whether we've
	// seen a typedef with the same name (that we can merge with) or any
	// other entity by that name (which name lookup could conflict with).
	if (!DC->isFunctionOrMethod()) {
	SmallVector<NamedDecl *, 4> ConflictingDecls;
	unsigned IDNS = Decl::IDNS_Ordinary;
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
	continue;
	if (TypedefNameDecl *FoundTypedef =
	dyn_cast<TypedefNameDecl>(FoundDecls[I])) {
	if (Importer.IsStructurallyEquivalent(D->getUnderlyingType(),
	FoundTypedef->getUnderlyingType()))
	return Importer.Imported(D, FoundTypedef);
	}

	ConflictingDecls.push_back(FoundDecls[I]);
	}

	if (!ConflictingDecls.empty()) {
	Name = Importer.HandleNameConflict(Name, DC, IDNS,
	ConflictingDecls.data(),
	ConflictingDecls.size());
	if (!Name)
	return nullptr;
	}
	}

	// Import the underlying type of this typedef;
	QualType T = Importer.Import(D->getUnderlyingType());
	if (T.isNull())
	return nullptr;

	// Create the new typedef node.
	TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
	SourceLocation StartL = Importer.Import(D->getLocStart());
	TypedefNameDecl *ToTypedef;
	if (IsAlias)
	ToTypedef = TypeAliasDecl::Create(Importer.getToContext(), DC,
	StartL, Loc,
	Name.getAsIdentifierInfo(),
	TInfo);
	else
	ToTypedef = TypedefDecl::Create(Importer.getToContext(), DC,
	StartL, Loc,
	Name.getAsIdentifierInfo(),
	TInfo);

	ToTypedef->setAccess(D->getAccess());
	ToTypedef->setLexicalDeclContext(LexicalDC);
	Importer.Imported(D, ToTypedef);
	LexicalDC->addDeclInternal(ToTypedef);

	return ToTypedef;
	}

	Decl ASTNodeImporter::VisitTypedefDecl(TypedefDecl D) {
	return VisitTypedefNameDecl(D, /IsAlias=/false);
	}

	Decl ASTNodeImporter::VisitTypeAliasDecl(TypeAliasDecl D) {
	return VisitTypedefNameDecl(D, /IsAlias=/true);
	}

	Decl ASTNodeImporter::VisitLabelDecl(LabelDecl D) {
	// Import the major distinguishing characteristics of this label.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	assert(LexicalDC->isFunctionOrMethod());

	LabelDecl *ToLabel = D->isGnuLocal()
	? LabelDecl::Create(Importer.getToContext(),
	DC, Importer.Import(D->getLocation()),
	Name.getAsIdentifierInfo(),
	Importer.Import(D->getLocStart()))
	: LabelDecl::Create(Importer.getToContext(),
	DC, Importer.Import(D->getLocation()),
	Name.getAsIdentifierInfo());
	Importer.Imported(D, ToLabel);

	LabelStmt *Label = cast_or_null<LabelStmt>(Importer.Import(D->getStmt()));
	if (!Label)
	return nullptr;

	ToLabel->setStmt(Label);
	ToLabel->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(ToLabel);
	return ToLabel;
	}

	Decl ASTNodeImporter::VisitEnumDecl(EnumDecl D) {
	// Import the major distinguishing characteristics of this enum.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	// Figure out what enum name we're looking for.
	unsigned IDNS = Decl::IDNS_Tag;
	DeclarationName SearchName = Name;
	if (!SearchName && D->getTypedefNameForAnonDecl()) {
	SearchName = Importer.Import(D->getTypedefNameForAnonDecl()->getDeclName());
	IDNS = Decl::IDNS_Ordinary;
	} else if (Importer.getToContext().getLangOpts().CPlusPlus)
	IDNS \|= Decl::IDNS_Ordinary;

	// We may already have an enum of the same name; try to find and match it.
	if (!DC->isFunctionOrMethod() && SearchName) {
	SmallVector<NamedDecl *, 4> ConflictingDecls;
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(SearchName, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
	continue;

	Decl *Found = FoundDecls[I];
	if (TypedefNameDecl *Typedef = dyn_cast<TypedefNameDecl>(Found)) {
	if (const TagType *Tag = Typedef->getUnderlyingType()->getAs<TagType>())
	Found = Tag->getDecl();
	}

	if (EnumDecl *FoundEnum = dyn_cast<EnumDecl>(Found)) {
	if (IsStructuralMatch(D, FoundEnum))
	return Importer.Imported(D, FoundEnum);
	}

	ConflictingDecls.push_back(FoundDecls[I]);
	}

	if (!ConflictingDecls.empty()) {
	Name = Importer.HandleNameConflict(Name, DC, IDNS,
	ConflictingDecls.data(),
	ConflictingDecls.size());
	}
	}

	// Create the enum declaration.
	EnumDecl *D2 = EnumDecl::Create(Importer.getToContext(), DC,
	Importer.Import(D->getLocStart()),
	Loc, Name.getAsIdentifierInfo(), nullptr,
	D->isScoped(), D->isScopedUsingClassTag(),
	D->isFixed());
	// Import the qualifier, if any.
	D2->setQualifierInfo(Importer.Import(D->getQualifierLoc()));
	D2->setAccess(D->getAccess());
	D2->setLexicalDeclContext(LexicalDC);
	Importer.Imported(D, D2);
	LexicalDC->addDeclInternal(D2);

	// Import the integer type.
	QualType ToIntegerType = Importer.Import(D->getIntegerType());
	if (ToIntegerType.isNull())
	return nullptr;
	D2->setIntegerType(ToIntegerType);

	// Import the definition
	if (D->isCompleteDefinition() && ImportDefinition(D, D2))
	return nullptr;

	return D2;
	}

	Decl ASTNodeImporter::VisitRecordDecl(RecordDecl D) {
	// If this record has a definition in the translation unit we're coming from,
	// but this particular declaration is not that definition, import the
	// definition and map to that.
	TagDecl *Definition = D->getDefinition();
	if (Definition && Definition != D) {
	Decl *ImportedDef = Importer.Import(Definition);
	if (!ImportedDef)
	return nullptr;

	return Importer.Imported(D, ImportedDef);
	}

	// Import the major distinguishing characteristics of this record.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	// Figure out what structure name we're looking for.
	unsigned IDNS = Decl::IDNS_Tag;
	DeclarationName SearchName = Name;
	if (!SearchName && D->getTypedefNameForAnonDecl()) {
	SearchName = Importer.Import(D->getTypedefNameForAnonDecl()->getDeclName());
	IDNS = Decl::IDNS_Ordinary;
	} else if (Importer.getToContext().getLangOpts().CPlusPlus)
	IDNS \|= Decl::IDNS_Ordinary;

	// We may already have a record of the same name; try to find and match it.
	RecordDecl *AdoptDecl = nullptr;
	RecordDecl *PrevDecl = nullptr;
	if (!DC->isFunctionOrMethod()) {
	SmallVector<NamedDecl *, 4> ConflictingDecls;
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(SearchName, FoundDecls);

	if (!FoundDecls.empty()) {
	// We're going to have to compare D against potentially conflicting Decls, so complete it.
	if (D->hasExternalLexicalStorage() && !D->isCompleteDefinition())
	D->getASTContext().getExternalSource()->CompleteType(D);
	}

	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
	continue;

	Decl *Found = FoundDecls[I];
	if (TypedefNameDecl *Typedef = dyn_cast<TypedefNameDecl>(Found)) {
	if (const TagType *Tag = Typedef->getUnderlyingType()->getAs<TagType>())
	Found = Tag->getDecl();
	}

	if (RecordDecl *FoundRecord = dyn_cast<RecordDecl>(Found)) {
	if (D->isAnonymousStructOrUnion() &&
	FoundRecord->isAnonymousStructOrUnion()) {
	// If both anonymous structs/unions are in a record context, make sure
	// they occur in the same location in the context records.
	if (Optional<unsigned> Index1 =
	StructuralEquivalenceContext::findUntaggedStructOrUnionIndex(
	D)) {
	if (Optional<unsigned> Index2 = StructuralEquivalenceContext::
	findUntaggedStructOrUnionIndex(FoundRecord)) {
	if (Index1 != Index2)
	continue;
	}
	}
	}

	PrevDecl = FoundRecord;

	if (RecordDecl *FoundDef = FoundRecord->getDefinition()) {
	if ((SearchName && !D->isCompleteDefinition())
	\|\| (D->isCompleteDefinition() &&
	D->isAnonymousStructOrUnion()
	== FoundDef->isAnonymousStructOrUnion() &&
	IsStructuralMatch(D, FoundDef))) {
	// The record types structurally match, or the "from" translation
	// unit only had a forward declaration anyway; call it the same
	// function.
	// FIXME: For C++, we should also merge methods here.
	return Importer.Imported(D, FoundDef);
	}
	} else if (!D->isCompleteDefinition()) {
	// We have a forward declaration of this type, so adopt that forward
	// declaration rather than building a new one.

	// If one or both can be completed from external storage then try one
	// last time to complete and compare them before doing this.

	if (FoundRecord->hasExternalLexicalStorage() &&
	!FoundRecord->isCompleteDefinition())
	FoundRecord->getASTContext().getExternalSource()->CompleteType(FoundRecord);
	if (D->hasExternalLexicalStorage())
	D->getASTContext().getExternalSource()->CompleteType(D);

	if (FoundRecord->isCompleteDefinition() &&
	D->isCompleteDefinition() &&
	!IsStructuralMatch(D, FoundRecord))
	continue;

	AdoptDecl = FoundRecord;
	continue;
	} else if (!SearchName) {
	continue;
	}
	}

	ConflictingDecls.push_back(FoundDecls[I]);
	}

	if (!ConflictingDecls.empty() && SearchName) {
	Name = Importer.HandleNameConflict(Name, DC, IDNS,
	ConflictingDecls.data(),
	ConflictingDecls.size());
	}
	}

	// Create the record declaration.
	RecordDecl *D2 = AdoptDecl;
	SourceLocation StartLoc = Importer.Import(D->getLocStart());
	if (!D2) {
	CXXRecordDecl *D2CXX = nullptr;
	if (CXXRecordDecl *DCXX = llvm::dyn_cast<CXXRecordDecl>(D)) {
	if (DCXX->isLambda()) {
	TypeSourceInfo *TInfo = Importer.Import(DCXX->getLambdaTypeInfo());
	D2CXX = CXXRecordDecl::CreateLambda(Importer.getToContext(),
	DC, TInfo, Loc,
	DCXX->isDependentLambda(),
	DCXX->isGenericLambda(),
	DCXX->getLambdaCaptureDefault());
	Decl *CDecl = Importer.Import(DCXX->getLambdaContextDecl());
	if (DCXX->getLambdaContextDecl() && !CDecl)
	return nullptr;
	D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), CDecl);
	} else if (DCXX->isInjectedClassName()) {
	// We have to be careful to do a similar dance to the one in
	// Sema::ActOnStartCXXMemberDeclarations
	CXXRecordDecl *const PrevDecl = nullptr;
	const bool DelayTypeCreation = true;
	D2CXX = CXXRecordDecl::Create(
	Importer.getToContext(), D->getTagKind(), DC, StartLoc, Loc,
	Name.getAsIdentifierInfo(), PrevDecl, DelayTypeCreation);
	Importer.getToContext().getTypeDeclType(
	D2CXX, llvm::dyn_cast<CXXRecordDecl>(DC));
	} else {
	D2CXX = CXXRecordDecl::Create(Importer.getToContext(),
	D->getTagKind(),
	DC, StartLoc, Loc,
	Name.getAsIdentifierInfo());
	}
	D2 = D2CXX;
	D2->setAccess(D->getAccess());
	} else {
	D2 = RecordDecl::Create(Importer.getToContext(), D->getTagKind(),
	DC, StartLoc, Loc, Name.getAsIdentifierInfo());
	}

	D2->setQualifierInfo(Importer.Import(D->getQualifierLoc()));
	D2->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(D2);
	if (D->isAnonymousStructOrUnion())
	D2->setAnonymousStructOrUnion(true);
	if (PrevDecl) {
	// FIXME: do this for all Redeclarables, not just RecordDecls.
	D2->setPreviousDecl(PrevDecl);
	}
	}

	Importer.Imported(D, D2);

	if (D->isCompleteDefinition() && ImportDefinition(D, D2, IDK_Default))
	return nullptr;

	return D2;
	}

	Decl ASTNodeImporter::VisitEnumConstantDecl(EnumConstantDecl D) {
	// Import the major distinguishing characteristics of this enumerator.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	QualType T = Importer.Import(D->getType());
	if (T.isNull())
	return nullptr;

	// Determine whether there are any other declarations with the same name and
	// in the same context.
	if (!LexicalDC->isFunctionOrMethod()) {
	SmallVector<NamedDecl *, 4> ConflictingDecls;
	unsigned IDNS = Decl::IDNS_Ordinary;
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
	continue;

	if (EnumConstantDecl *FoundEnumConstant
	= dyn_cast<EnumConstantDecl>(FoundDecls[I])) {
	if (IsStructuralMatch(D, FoundEnumConstant))
	return Importer.Imported(D, FoundEnumConstant);
	}

	ConflictingDecls.push_back(FoundDecls[I]);
	}

	if (!ConflictingDecls.empty()) {
	Name = Importer.HandleNameConflict(Name, DC, IDNS,
	ConflictingDecls.data(),
	ConflictingDecls.size());
	if (!Name)
	return nullptr;
	}
	}

	Expr *Init = Importer.Import(D->getInitExpr());
	if (D->getInitExpr() && !Init)
	return nullptr;

	EnumConstantDecl *ToEnumerator
	= EnumConstantDecl::Create(Importer.getToContext(), cast<EnumDecl>(DC), Loc,
	Name.getAsIdentifierInfo(), T,
	Init, D->getInitVal());
	ToEnumerator->setAccess(D->getAccess());
	ToEnumerator->setLexicalDeclContext(LexicalDC);
	Importer.Imported(D, ToEnumerator);
	LexicalDC->addDeclInternal(ToEnumerator);
	return ToEnumerator;
	}

	Decl ASTNodeImporter::VisitFunctionDecl(FunctionDecl D) {
	// Import the major distinguishing characteristics of this function.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	// Try to find a function in our own ("to") context with the same name, same
	// type, and in the same context as the function we're importing.
	if (!LexicalDC->isFunctionOrMethod()) {
	SmallVector<NamedDecl *, 4> ConflictingDecls;
	unsigned IDNS = Decl::IDNS_Ordinary;
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
	continue;

	if (FunctionDecl *FoundFunction = dyn_cast<FunctionDecl>(FoundDecls[I])) {
	if (FoundFunction->hasExternalFormalLinkage() &&
	D->hasExternalFormalLinkage()) {
	if (Importer.IsStructurallyEquivalent(D->getType(),
	FoundFunction->getType())) {
	// FIXME: Actually try to merge the body and other attributes.
	return Importer.Imported(D, FoundFunction);
	}

	// FIXME: Check for overloading more carefully, e.g., by boosting
	// Sema::IsOverload out to the AST library.

	// Function overloading is okay in C++.
	if (Importer.getToContext().getLangOpts().CPlusPlus)
	continue;

	// Complain about inconsistent function types.
	Importer.ToDiag(Loc, diag::err_odr_function_type_inconsistent)
	<< Name << D->getType() << FoundFunction->getType();
	Importer.ToDiag(FoundFunction->getLocation(),
	diag::note_odr_value_here)
	<< FoundFunction->getType();
	}
	}

	ConflictingDecls.push_back(FoundDecls[I]);
	}

	if (!ConflictingDecls.empty()) {
	Name = Importer.HandleNameConflict(Name, DC, IDNS,
	ConflictingDecls.data(),
	ConflictingDecls.size());
	if (!Name)
	return nullptr;
	}
	}

	DeclarationNameInfo NameInfo(Name, Loc);
	// Import additional name location/type info.
	ImportDeclarationNameLoc(D->getNameInfo(), NameInfo);

	QualType FromTy = D->getType();
	bool usedDifferentExceptionSpec = false;

	if (const FunctionProtoType *
	FromFPT = D->getType()->getAs<FunctionProtoType>()) {
	FunctionProtoType::ExtProtoInfo FromEPI = FromFPT->getExtProtoInfo();
	// FunctionProtoType::ExtProtoInfo's ExceptionSpecDecl can point to the
	// FunctionDecl that we are importing the FunctionProtoType for.
	// To avoid an infinite recursion when importing, create the FunctionDecl
	// with a simplified function type and update it afterwards.
	if (FromEPI.ExceptionSpec.SourceDecl \|\|
	FromEPI.ExceptionSpec.SourceTemplate \|\|
	FromEPI.ExceptionSpec.NoexceptExpr) {
	FunctionProtoType::ExtProtoInfo DefaultEPI;
	FromTy = Importer.getFromContext().getFunctionType(
	FromFPT->getReturnType(), FromFPT->getParamTypes(), DefaultEPI);
	usedDifferentExceptionSpec = true;
	}
	}

	// Import the type.
	QualType T = Importer.Import(FromTy);
	if (T.isNull())
	return nullptr;

	// Import the function parameters.
	SmallVector<ParmVarDecl *, 8> Parameters;
	for (auto P : D->parameters()) {
	ParmVarDecl *ToP = cast_or_null<ParmVarDecl>(Importer.Import(P));
	if (!ToP)
	return nullptr;

	Parameters.push_back(ToP);
	}

	// Create the imported function.
	TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
	FunctionDecl *ToFunction = nullptr;
	SourceLocation InnerLocStart = Importer.Import(D->getInnerLocStart());
	if (CXXConstructorDecl *FromConstructor = dyn_cast<CXXConstructorDecl>(D)) {
	ToFunction = CXXConstructorDecl::Create(Importer.getToContext(),
	cast<CXXRecordDecl>(DC),
	InnerLocStart,
	NameInfo, T, TInfo,
	FromConstructor->isExplicit(),
	D->isInlineSpecified(),
	D->isImplicit(),
	D->isConstexpr());
	if (unsigned NumInitializers = FromConstructor->getNumCtorInitializers()) {
	SmallVector<CXXCtorInitializer *, 4> CtorInitializers;
	for (CXXCtorInitializer *I : FromConstructor->inits()) {
	CXXCtorInitializer *ToI =
	cast_or_null<CXXCtorInitializer>(Importer.Import(I));
	if (!ToI && I)
	return nullptr;
	CtorInitializers.push_back(ToI);
	}
	CXXCtorInitializer **Memory =
	new (Importer.getToContext()) CXXCtorInitializer *[NumInitializers];
	std::copy(CtorInitializers.begin(), CtorInitializers.end(), Memory);
	CXXConstructorDecl *ToCtor = llvm::cast<CXXConstructorDecl>(ToFunction);
	ToCtor->setCtorInitializers(Memory);
	ToCtor->setNumCtorInitializers(NumInitializers);
	}
	} else if (isa<CXXDestructorDecl>(D)) {
	ToFunction = CXXDestructorDecl::Create(Importer.getToContext(),
	cast<CXXRecordDecl>(DC),
	InnerLocStart,
	NameInfo, T, TInfo,
	D->isInlineSpecified(),
	D->isImplicit());
	} else if (CXXConversionDecl *FromConversion
	= dyn_cast<CXXConversionDecl>(D)) {
	ToFunction = CXXConversionDecl::Create(Importer.getToContext(),
	cast<CXXRecordDecl>(DC),
	InnerLocStart,
	NameInfo, T, TInfo,
	D->isInlineSpecified(),
	FromConversion->isExplicit(),
	D->isConstexpr(),
	Importer.Import(D->getLocEnd()));
	} else if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
	ToFunction = CXXMethodDecl::Create(Importer.getToContext(),
	cast<CXXRecordDecl>(DC),
	InnerLocStart,
	NameInfo, T, TInfo,
	Method->getStorageClass(),
	Method->isInlineSpecified(),
	D->isConstexpr(),
	Importer.Import(D->getLocEnd()));
	} else {
	ToFunction = FunctionDecl::Create(Importer.getToContext(), DC,
	InnerLocStart,
	NameInfo, T, TInfo, D->getStorageClass(),
	D->isInlineSpecified(),
	D->hasWrittenPrototype(),
	D->isConstexpr());
	}

	// Import the qualifier, if any.
	ToFunction->setQualifierInfo(Importer.Import(D->getQualifierLoc()));
	ToFunction->setAccess(D->getAccess());
	ToFunction->setLexicalDeclContext(LexicalDC);
	ToFunction->setVirtualAsWritten(D->isVirtualAsWritten());
	ToFunction->setTrivial(D->isTrivial());
	ToFunction->setPure(D->isPure());
	Importer.Imported(D, ToFunction);

	// Set the parameters.
	for (unsigned I = 0, N = Parameters.size(); I != N; ++I) {
	Parameters[I]->setOwningFunction(ToFunction);
	ToFunction->addDeclInternal(Parameters[I]);
	}
	ToFunction->setParams(Parameters);

	if (usedDifferentExceptionSpec) {
	// Update FunctionProtoType::ExtProtoInfo.
	QualType T = Importer.Import(D->getType());
	if (T.isNull())
	return nullptr;
	ToFunction->setType(T);
	}

	// Import the body, if any.
	if (Stmt *FromBody = D->getBody()) {
	if (Stmt *ToBody = Importer.Import(FromBody)) {
	ToFunction->setBody(ToBody);
	}
	}

	// FIXME: Other bits to merge?

	// Add this function to the lexical context.
	LexicalDC->addDeclInternal(ToFunction);

	if (auto *FromCXXMethod = dyn_cast<CXXMethodDecl>(D))
	ImportOverrides(cast<CXXMethodDecl>(ToFunction), FromCXXMethod);

	return ToFunction;
	}

	Decl ASTNodeImporter::VisitCXXMethodDecl(CXXMethodDecl D) {
	return VisitFunctionDecl(D);
	}

	Decl ASTNodeImporter::VisitCXXConstructorDecl(CXXConstructorDecl D) {
	return VisitCXXMethodDecl(D);
	}

	Decl ASTNodeImporter::VisitCXXDestructorDecl(CXXDestructorDecl D) {
	return VisitCXXMethodDecl(D);
	}

	Decl ASTNodeImporter::VisitCXXConversionDecl(CXXConversionDecl D) {
	return VisitCXXMethodDecl(D);
	}

	static unsigned getFieldIndex(Decl *F) {
	RecordDecl *Owner = dyn_cast<RecordDecl>(F->getDeclContext());
	if (!Owner)
	return 0;

	unsigned Index = 1;
	for (const auto *D : Owner->noload_decls()) {
	if (D == F)
	return Index;

	if (isa<FieldDecl>(D) \|\| isa<IndirectFieldDecl>(D))
	++Index;
	}

	return Index;
	}

	Decl ASTNodeImporter::VisitFieldDecl(FieldDecl D) {
	// Import the major distinguishing characteristics of a variable.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	// Determine whether we've already imported this field.
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (FieldDecl *FoundField = dyn_cast<FieldDecl>(FoundDecls[I])) {
	// For anonymous fields, match up by index.
	if (!Name && getFieldIndex(D) != getFieldIndex(FoundField))
	continue;

	if (Importer.IsStructurallyEquivalent(D->getType(),
	FoundField->getType())) {
	Importer.Imported(D, FoundField);
	return FoundField;
	}

	Importer.ToDiag(Loc, diag::err_odr_field_type_inconsistent)
	<< Name << D->getType() << FoundField->getType();
	Importer.ToDiag(FoundField->getLocation(), diag::note_odr_value_here)
	<< FoundField->getType();
	return nullptr;
	}
	}

	// Import the type.
	QualType T = Importer.Import(D->getType());
	if (T.isNull())
	return nullptr;

	TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
	Expr *BitWidth = Importer.Import(D->getBitWidth());
	if (!BitWidth && D->getBitWidth())
	return nullptr;

	FieldDecl *ToField = FieldDecl::Create(Importer.getToContext(), DC,
	Importer.Import(D->getInnerLocStart()),
	Loc, Name.getAsIdentifierInfo(),
	T, TInfo, BitWidth, D->isMutable(),
	D->getInClassInitStyle());
	ToField->setAccess(D->getAccess());
	ToField->setLexicalDeclContext(LexicalDC);
	if (Expr *FromInitializer = D->getInClassInitializer()) {
	Expr *ToInitializer = Importer.Import(FromInitializer);
	if (ToInitializer)
	ToField->setInClassInitializer(ToInitializer);
	else
	return nullptr;
	}
	ToField->setImplicit(D->isImplicit());
	Importer.Imported(D, ToField);
	LexicalDC->addDeclInternal(ToField);
	return ToField;
	}

	Decl ASTNodeImporter::VisitIndirectFieldDecl(IndirectFieldDecl D) {
	// Import the major distinguishing characteristics of a variable.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	// Determine whether we've already imported this field.
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (IndirectFieldDecl *FoundField
	= dyn_cast<IndirectFieldDecl>(FoundDecls[I])) {
	// For anonymous indirect fields, match up by index.
	if (!Name && getFieldIndex(D) != getFieldIndex(FoundField))
	continue;

	if (Importer.IsStructurallyEquivalent(D->getType(),
	FoundField->getType(),
	!Name.isEmpty())) {
	Importer.Imported(D, FoundField);
	return FoundField;
	}

	// If there are more anonymous fields to check, continue.
	if (!Name && I < N-1)
	continue;

	Importer.ToDiag(Loc, diag::err_odr_field_type_inconsistent)
	<< Name << D->getType() << FoundField->getType();
	Importer.ToDiag(FoundField->getLocation(), diag::note_odr_value_here)
	<< FoundField->getType();
	return nullptr;
	}
	}

	// Import the type.
	QualType T = Importer.Import(D->getType());
	if (T.isNull())
	return nullptr;

	NamedDecl **NamedChain =
	new (Importer.getToContext())NamedDecl*[D->getChainingSize()];

	unsigned i = 0;
	for (auto *PI : D->chain()) {
	Decl *D = Importer.Import(PI);
	if (!D)
	return nullptr;
	NamedChain[i++] = cast<NamedDecl>(D);
	}

	IndirectFieldDecl *ToIndirectField = IndirectFieldDecl::Create(
	Importer.getToContext(), DC, Loc, Name.getAsIdentifierInfo(), T,
	{NamedChain, D->getChainingSize()});

	for (const auto *Attr : D->attrs())
	ToIndirectField->addAttr(Attr->clone(Importer.getToContext()));

	ToIndirectField->setAccess(D->getAccess());
	ToIndirectField->setLexicalDeclContext(LexicalDC);
	Importer.Imported(D, ToIndirectField);
	LexicalDC->addDeclInternal(ToIndirectField);
	return ToIndirectField;
	}

	Decl ASTNodeImporter::VisitFriendDecl(FriendDecl D) {
	// Import the major distinguishing characteristics of a declaration.
	DeclContext *DC = Importer.ImportContext(D->getDeclContext());
	DeclContext *LexicalDC = D->getDeclContext() == D->getLexicalDeclContext()
	? DC : Importer.ImportContext(D->getLexicalDeclContext());
	if (!DC \|\| !LexicalDC)
	return nullptr;

	// Determine whether we've already imported this decl.
	// FriendDecl is not a NamedDecl so we cannot use localUncachedLookup.
	auto *RD = cast<CXXRecordDecl>(DC);
	FriendDecl *ImportedFriend = RD->getFirstFriend();
	StructuralEquivalenceContext Context(
	Importer.getFromContext(), Importer.getToContext(),
	Importer.getNonEquivalentDecls(), false, false);

	while (ImportedFriend) {
	if (D->getFriendDecl() && ImportedFriend->getFriendDecl()) {
	if (Context.IsStructurallyEquivalent(D->getFriendDecl(),
	ImportedFriend->getFriendDecl()))
	return Importer.Imported(D, ImportedFriend);

	} else if (D->getFriendType() && ImportedFriend->getFriendType()) {
	if (Importer.IsStructurallyEquivalent(
	D->getFriendType()->getType(),
	ImportedFriend->getFriendType()->getType(), true))
	return Importer.Imported(D, ImportedFriend);
	}
	ImportedFriend = ImportedFriend->getNextFriend();
	}

	// Not found. Create it.
	FriendDecl::FriendUnion ToFU;
	if (NamedDecl *FriendD = D->getFriendDecl())
	ToFU = cast_or_null<NamedDecl>(Importer.Import(FriendD));
	else
	ToFU = Importer.Import(D->getFriendType());
	if (!ToFU)
	return nullptr;

	SmallVector<TemplateParameterList *, 1> ToTPLists(D->NumTPLists);
	TemplateParameterList **FromTPLists =
	D->getTrailingObjects<TemplateParameterList *>();
	for (unsigned I = 0; I < D->NumTPLists; I++) {
	TemplateParameterList *List = ImportTemplateParameterList(FromTPLists[I]);
	if (!List)
	return nullptr;
	ToTPLists[I] = List;
	}

	FriendDecl *FrD = FriendDecl::Create(Importer.getToContext(), DC,
	Importer.Import(D->getLocation()),
	ToFU, Importer.Import(D->getFriendLoc()),
	ToTPLists);

	Importer.Imported(D, FrD);
	RD->pushFriendDecl(FrD);

	FrD->setAccess(D->getAccess());
	FrD->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(FrD);
	return FrD;
	}

	Decl ASTNodeImporter::VisitObjCIvarDecl(ObjCIvarDecl D) {
	// Import the major distinguishing characteristics of an ivar.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	// Determine whether we've already imported this ivar
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (ObjCIvarDecl *FoundIvar = dyn_cast<ObjCIvarDecl>(FoundDecls[I])) {
	if (Importer.IsStructurallyEquivalent(D->getType(),
	FoundIvar->getType())) {
	Importer.Imported(D, FoundIvar);
	return FoundIvar;
	}

	Importer.ToDiag(Loc, diag::err_odr_ivar_type_inconsistent)
	<< Name << D->getType() << FoundIvar->getType();
	Importer.ToDiag(FoundIvar->getLocation(), diag::note_odr_value_here)
	<< FoundIvar->getType();
	return nullptr;
	}
	}

	// Import the type.
	QualType T = Importer.Import(D->getType());
	if (T.isNull())
	return nullptr;

	TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
	Expr *BitWidth = Importer.Import(D->getBitWidth());
	if (!BitWidth && D->getBitWidth())
	return nullptr;

	ObjCIvarDecl *ToIvar = ObjCIvarDecl::Create(Importer.getToContext(),
	cast<ObjCContainerDecl>(DC),
	Importer.Import(D->getInnerLocStart()),
	Loc, Name.getAsIdentifierInfo(),
	T, TInfo, D->getAccessControl(),
	BitWidth, D->getSynthesize());
	ToIvar->setLexicalDeclContext(LexicalDC);
	Importer.Imported(D, ToIvar);
	LexicalDC->addDeclInternal(ToIvar);
	return ToIvar;

	}

	Decl ASTNodeImporter::VisitVarDecl(VarDecl D) {
	// Import the major distinguishing characteristics of a variable.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	// Try to find a variable in our own ("to") context with the same name and
	// in the same context as the variable we're importing.
	if (D->isFileVarDecl()) {
	VarDecl *MergeWithVar = nullptr;
	SmallVector<NamedDecl *, 4> ConflictingDecls;
	unsigned IDNS = Decl::IDNS_Ordinary;
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
	continue;

	if (VarDecl *FoundVar = dyn_cast<VarDecl>(FoundDecls[I])) {
	// We have found a variable that we may need to merge with. Check it.
	if (FoundVar->hasExternalFormalLinkage() &&
	D->hasExternalFormalLinkage()) {
	if (Importer.IsStructurallyEquivalent(D->getType(),
	FoundVar->getType())) {
	MergeWithVar = FoundVar;
	break;
	}

	const ArrayType *FoundArray
	= Importer.getToContext().getAsArrayType(FoundVar->getType());
	const ArrayType *TArray
	= Importer.getToContext().getAsArrayType(D->getType());
	if (FoundArray && TArray) {
	if (isa<IncompleteArrayType>(FoundArray) &&
	isa<ConstantArrayType>(TArray)) {
	// Import the type.
	QualType T = Importer.Import(D->getType());
	if (T.isNull())
	return nullptr;

	FoundVar->setType(T);
	MergeWithVar = FoundVar;
	break;
	} else if (isa<IncompleteArrayType>(TArray) &&
	isa<ConstantArrayType>(FoundArray)) {
	MergeWithVar = FoundVar;
	break;
	}
	}

	Importer.ToDiag(Loc, diag::err_odr_variable_type_inconsistent)
	<< Name << D->getType() << FoundVar->getType();
	Importer.ToDiag(FoundVar->getLocation(), diag::note_odr_value_here)
	<< FoundVar->getType();
	}
	}

	ConflictingDecls.push_back(FoundDecls[I]);
	}

	if (MergeWithVar) {
	// An equivalent variable with external linkage has been found. Link
	// the two declarations, then merge them.
	Importer.Imported(D, MergeWithVar);

	if (VarDecl *DDef = D->getDefinition()) {
	if (VarDecl *ExistingDef = MergeWithVar->getDefinition()) {
	Importer.ToDiag(ExistingDef->getLocation(),
	diag::err_odr_variable_multiple_def)
	<< Name;
	Importer.FromDiag(DDef->getLocation(), diag::note_odr_defined_here);
	} else {
	Expr *Init = Importer.Import(DDef->getInit());
	MergeWithVar->setInit(Init);
	if (DDef->isInitKnownICE()) {
	EvaluatedStmt *Eval = MergeWithVar->ensureEvaluatedStmt();
	Eval->CheckedICE = true;
	Eval->IsICE = DDef->isInitICE();
	}
	}
	}

	return MergeWithVar;
	}

	if (!ConflictingDecls.empty()) {
	Name = Importer.HandleNameConflict(Name, DC, IDNS,
	ConflictingDecls.data(),
	ConflictingDecls.size());
	if (!Name)
	return nullptr;
	}
	}

	// Import the type.
	QualType T = Importer.Import(D->getType());
	if (T.isNull())
	return nullptr;

	// Create the imported variable.
	TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
	VarDecl *ToVar = VarDecl::Create(Importer.getToContext(), DC,
	Importer.Import(D->getInnerLocStart()),
	Loc, Name.getAsIdentifierInfo(),
	T, TInfo,
	D->getStorageClass());
	ToVar->setQualifierInfo(Importer.Import(D->getQualifierLoc()));
	ToVar->setAccess(D->getAccess());
	ToVar->setLexicalDeclContext(LexicalDC);
	Importer.Imported(D, ToVar);
	LexicalDC->addDeclInternal(ToVar);

	if (!D->isFileVarDecl() &&
	D->isUsed())
	ToVar->setIsUsed();

	// Merge the initializer.
	if (ImportDefinition(D, ToVar))
	return nullptr;

	if (D->isConstexpr())
	ToVar->setConstexpr(true);

	return ToVar;
	}

	Decl ASTNodeImporter::VisitImplicitParamDecl(ImplicitParamDecl D) {
	// Parameters are created in the translation unit's context, then moved
	// into the function declaration's context afterward.
	DeclContext *DC = Importer.getToContext().getTranslationUnitDecl();

	// Import the name of this declaration.
	DeclarationName Name = Importer.Import(D->getDeclName());
	if (D->getDeclName() && !Name)
	return nullptr;

	// Import the location of this declaration.
	SourceLocation Loc = Importer.Import(D->getLocation());

	// Import the parameter's type.
	QualType T = Importer.Import(D->getType());
	if (T.isNull())
	return nullptr;

	// Create the imported parameter.
	auto *ToParm = ImplicitParamDecl::Create(Importer.getToContext(), DC, Loc,
	Name.getAsIdentifierInfo(), T,
	D->getParameterKind());
	return Importer.Imported(D, ToParm);
	}

	Decl ASTNodeImporter::VisitParmVarDecl(ParmVarDecl D) {
	// Parameters are created in the translation unit's context, then moved
	// into the function declaration's context afterward.
	DeclContext *DC = Importer.getToContext().getTranslationUnitDecl();

	// Import the name of this declaration.
	DeclarationName Name = Importer.Import(D->getDeclName());
	if (D->getDeclName() && !Name)
	return nullptr;

	// Import the location of this declaration.
	SourceLocation Loc = Importer.Import(D->getLocation());

	// Import the parameter's type.
	QualType T = Importer.Import(D->getType());
	if (T.isNull())
	return nullptr;

	// Create the imported parameter.
	TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
	ParmVarDecl *ToParm = ParmVarDecl::Create(Importer.getToContext(), DC,
	Importer.Import(D->getInnerLocStart()),
	Loc, Name.getAsIdentifierInfo(),
	T, TInfo, D->getStorageClass(),
	/DefaultArg/ nullptr);

	// Set the default argument.
	ToParm->setHasInheritedDefaultArg(D->hasInheritedDefaultArg());
	ToParm->setKNRPromoted(D->isKNRPromoted());

	Expr *ToDefArg = nullptr;
	Expr *FromDefArg = nullptr;
	if (D->hasUninstantiatedDefaultArg()) {
	FromDefArg = D->getUninstantiatedDefaultArg();
	ToDefArg = Importer.Import(FromDefArg);
	ToParm->setUninstantiatedDefaultArg(ToDefArg);
	} else if (D->hasUnparsedDefaultArg()) {
	ToParm->setUnparsedDefaultArg();
	} else if (D->hasDefaultArg()) {
	FromDefArg = D->getDefaultArg();
	ToDefArg = Importer.Import(FromDefArg);
	ToParm->setDefaultArg(ToDefArg);
	}
	if (FromDefArg && !ToDefArg)
	return nullptr;

	if (D->isUsed())
	ToParm->setIsUsed();

	return Importer.Imported(D, ToParm);
	}

	Decl ASTNodeImporter::VisitObjCMethodDecl(ObjCMethodDecl D) {
	// Import the major distinguishing characteristics of a method.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (ObjCMethodDecl *FoundMethod = dyn_cast<ObjCMethodDecl>(FoundDecls[I])) {
	if (FoundMethod->isInstanceMethod() != D->isInstanceMethod())
	continue;

	// Check return types.
	if (!Importer.IsStructurallyEquivalent(D->getReturnType(),
	FoundMethod->getReturnType())) {
	Importer.ToDiag(Loc, diag::err_odr_objc_method_result_type_inconsistent)
	<< D->isInstanceMethod() << Name << D->getReturnType()
	<< FoundMethod->getReturnType();
	Importer.ToDiag(FoundMethod->getLocation(),
	diag::note_odr_objc_method_here)
	<< D->isInstanceMethod() << Name;
	return nullptr;
	}

	// Check the number of parameters.
	if (D->param_size() != FoundMethod->param_size()) {
	Importer.ToDiag(Loc, diag::err_odr_objc_method_num_params_inconsistent)
	<< D->isInstanceMethod() << Name
	<< D->param_size() << FoundMethod->param_size();
	Importer.ToDiag(FoundMethod->getLocation(),
	diag::note_odr_objc_method_here)
	<< D->isInstanceMethod() << Name;
	return nullptr;
	}

	// Check parameter types.
	for (ObjCMethodDecl::param_iterator P = D->param_begin(),
	PEnd = D->param_end(), FoundP = FoundMethod->param_begin();
	P != PEnd; ++P, ++FoundP) {
	if (!Importer.IsStructurallyEquivalent((*P)->getType(),
	(*FoundP)->getType())) {
	Importer.FromDiag((*P)->getLocation(),
	diag::err_odr_objc_method_param_type_inconsistent)
	<< D->isInstanceMethod() << Name
	<< (P)->getType() << (FoundP)->getType();
	Importer.ToDiag((*FoundP)->getLocation(), diag::note_odr_value_here)
	<< (*FoundP)->getType();
	return nullptr;
	}
	}

	// Check variadic/non-variadic.
	// Check the number of parameters.
	if (D->isVariadic() != FoundMethod->isVariadic()) {
	Importer.ToDiag(Loc, diag::err_odr_objc_method_variadic_inconsistent)
	<< D->isInstanceMethod() << Name;
	Importer.ToDiag(FoundMethod->getLocation(),
	diag::note_odr_objc_method_here)
	<< D->isInstanceMethod() << Name;
	return nullptr;
	}

	// FIXME: Any other bits we need to merge?
	return Importer.Imported(D, FoundMethod);
	}
	}

	// Import the result type.
	QualType ResultTy = Importer.Import(D->getReturnType());
	if (ResultTy.isNull())
	return nullptr;

	TypeSourceInfo *ReturnTInfo = Importer.Import(D->getReturnTypeSourceInfo());

	ObjCMethodDecl *ToMethod = ObjCMethodDecl::Create(
	Importer.getToContext(), Loc, Importer.Import(D->getLocEnd()),
	Name.getObjCSelector(), ResultTy, ReturnTInfo, DC, D->isInstanceMethod(),
	D->isVariadic(), D->isPropertyAccessor(), D->isImplicit(), D->isDefined(),
	D->getImplementationControl(), D->hasRelatedResultType());

	// FIXME: When we decide to merge method definitions, we'll need to
	// deal with implicit parameters.

	// Import the parameters
	SmallVector<ParmVarDecl *, 5> ToParams;
	for (auto *FromP : D->parameters()) {
	ParmVarDecl *ToP = cast_or_null<ParmVarDecl>(Importer.Import(FromP));
	if (!ToP)
	return nullptr;

	ToParams.push_back(ToP);
	}

	// Set the parameters.
	for (unsigned I = 0, N = ToParams.size(); I != N; ++I) {
	ToParams[I]->setOwningFunction(ToMethod);
	ToMethod->addDeclInternal(ToParams[I]);
	}
	SmallVector<SourceLocation, 12> SelLocs;
	D->getSelectorLocs(SelLocs);
	ToMethod->setMethodParams(Importer.getToContext(), ToParams, SelLocs);

	ToMethod->setLexicalDeclContext(LexicalDC);
	Importer.Imported(D, ToMethod);
	LexicalDC->addDeclInternal(ToMethod);
	return ToMethod;
	}

	Decl ASTNodeImporter::VisitObjCTypeParamDecl(ObjCTypeParamDecl D) {
	// Import the major distinguishing characteristics of a category.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	TypeSourceInfo *BoundInfo = Importer.Import(D->getTypeSourceInfo());
	if (!BoundInfo)
	return nullptr;

	ObjCTypeParamDecl *Result = ObjCTypeParamDecl::Create(
	Importer.getToContext(), DC,
	D->getVariance(),
	Importer.Import(D->getVarianceLoc()),
	D->getIndex(),
	Importer.Import(D->getLocation()),
	Name.getAsIdentifierInfo(),
	Importer.Import(D->getColonLoc()),
	BoundInfo);
	Importer.Imported(D, Result);
	Result->setLexicalDeclContext(LexicalDC);
	return Result;
	}

	Decl ASTNodeImporter::VisitObjCCategoryDecl(ObjCCategoryDecl D) {
	// Import the major distinguishing characteristics of a category.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	ObjCInterfaceDecl *ToInterface
	= cast_or_null<ObjCInterfaceDecl>(Importer.Import(D->getClassInterface()));
	if (!ToInterface)
	return nullptr;

	// Determine if we've already encountered this category.
	ObjCCategoryDecl *MergeWithCategory
	= ToInterface->FindCategoryDeclaration(Name.getAsIdentifierInfo());
	ObjCCategoryDecl *ToCategory = MergeWithCategory;
	if (!ToCategory) {
	ToCategory = ObjCCategoryDecl::Create(Importer.getToContext(), DC,
	Importer.Import(D->getAtStartLoc()),
	Loc,
	Importer.Import(D->getCategoryNameLoc()),
	Name.getAsIdentifierInfo(),
	ToInterface,
	/TypeParamList=/nullptr,
	Importer.Import(D->getIvarLBraceLoc()),
	Importer.Import(D->getIvarRBraceLoc()));
	ToCategory->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(ToCategory);
	Importer.Imported(D, ToCategory);
	// Import the type parameter list after calling Imported, to avoid
	// loops when bringing in their DeclContext.
	ToCategory->setTypeParamList(ImportObjCTypeParamList(
	D->getTypeParamList()));

	// Import protocols
	SmallVector<ObjCProtocolDecl *, 4> Protocols;
	SmallVector<SourceLocation, 4> ProtocolLocs;
	ObjCCategoryDecl::protocol_loc_iterator FromProtoLoc
	= D->protocol_loc_begin();
	for (ObjCCategoryDecl::protocol_iterator FromProto = D->protocol_begin(),
	FromProtoEnd = D->protocol_end();
	FromProto != FromProtoEnd;
	++FromProto, ++FromProtoLoc) {
	ObjCProtocolDecl *ToProto
	= cast_or_null<ObjCProtocolDecl>(Importer.Import(*FromProto));
	if (!ToProto)
	return nullptr;
	Protocols.push_back(ToProto);
	ProtocolLocs.push_back(Importer.Import(*FromProtoLoc));
	}

	// FIXME: If we're merging, make sure that the protocol list is the same.
	ToCategory->setProtocolList(Protocols.data(), Protocols.size(),
	ProtocolLocs.data(), Importer.getToContext());

	} else {
	Importer.Imported(D, ToCategory);
	}

	// Import all of the members of this category.
	ImportDeclContext(D);

	// If we have an implementation, import it as well.
	if (D->getImplementation()) {
	ObjCCategoryImplDecl *Impl
	= cast_or_null<ObjCCategoryImplDecl>(
	Importer.Import(D->getImplementation()));
	if (!Impl)
	return nullptr;

	ToCategory->setImplementation(Impl);
	}

	return ToCategory;
	}

	bool ASTNodeImporter::ImportDefinition(ObjCProtocolDecl *From,
	ObjCProtocolDecl *To,
	ImportDefinitionKind Kind) {
	if (To->getDefinition()) {
	if (shouldForceImportDeclContext(Kind))
	ImportDeclContext(From);
	return false;
	}

	// Start the protocol definition
	To->startDefinition();

	// Import protocols
	SmallVector<ObjCProtocolDecl *, 4> Protocols;
	SmallVector<SourceLocation, 4> ProtocolLocs;
	ObjCProtocolDecl::protocol_loc_iterator
	FromProtoLoc = From->protocol_loc_begin();
	for (ObjCProtocolDecl::protocol_iterator FromProto = From->protocol_begin(),
	FromProtoEnd = From->protocol_end();
	FromProto != FromProtoEnd;
	++FromProto, ++FromProtoLoc) {
	ObjCProtocolDecl *ToProto
	= cast_or_null<ObjCProtocolDecl>(Importer.Import(*FromProto));
	if (!ToProto)
	return true;
	Protocols.push_back(ToProto);
	ProtocolLocs.push_back(Importer.Import(*FromProtoLoc));
	}

	// FIXME: If we're merging, make sure that the protocol list is the same.
	To->setProtocolList(Protocols.data(), Protocols.size(),
	ProtocolLocs.data(), Importer.getToContext());

	if (shouldForceImportDeclContext(Kind)) {
	// Import all of the members of this protocol.
	ImportDeclContext(From, /ForceImport=/true);
	}
	return false;
	}

	Decl ASTNodeImporter::VisitObjCProtocolDecl(ObjCProtocolDecl D) {
	// If this protocol has a definition in the translation unit we're coming
	// from, but this particular declaration is not that definition, import the
	// definition and map to that.
	ObjCProtocolDecl *Definition = D->getDefinition();
	if (Definition && Definition != D) {
	Decl *ImportedDef = Importer.Import(Definition);
	if (!ImportedDef)
	return nullptr;

	return Importer.Imported(D, ImportedDef);
	}

	// Import the major distinguishing characteristics of a protocol.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	ObjCProtocolDecl *MergeWithProtocol = nullptr;
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (!FoundDecls[I]->isInIdentifierNamespace(Decl::IDNS_ObjCProtocol))
	continue;

	if ((MergeWithProtocol = dyn_cast<ObjCProtocolDecl>(FoundDecls[I])))
	break;
	}

	ObjCProtocolDecl *ToProto = MergeWithProtocol;
	if (!ToProto) {
	ToProto = ObjCProtocolDecl::Create(Importer.getToContext(), DC,
	Name.getAsIdentifierInfo(), Loc,
	Importer.Import(D->getAtStartLoc()),
	/PrevDecl=/nullptr);
	ToProto->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(ToProto);
	}

	Importer.Imported(D, ToProto);

	if (D->isThisDeclarationADefinition() && ImportDefinition(D, ToProto))
	return nullptr;

	return ToProto;
	}

	Decl ASTNodeImporter::VisitLinkageSpecDecl(LinkageSpecDecl D) {
	DeclContext *DC = Importer.ImportContext(D->getDeclContext());
	DeclContext *LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());

	SourceLocation ExternLoc = Importer.Import(D->getExternLoc());
	SourceLocation LangLoc = Importer.Import(D->getLocation());

	bool HasBraces = D->hasBraces();

	LinkageSpecDecl *ToLinkageSpec =
	LinkageSpecDecl::Create(Importer.getToContext(),
	DC,
	ExternLoc,
	LangLoc,
	D->getLanguage(),
	HasBraces);

	if (HasBraces) {
	SourceLocation RBraceLoc = Importer.Import(D->getRBraceLoc());
	ToLinkageSpec->setRBraceLoc(RBraceLoc);
	}

	ToLinkageSpec->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(ToLinkageSpec);

	Importer.Imported(D, ToLinkageSpec);

	return ToLinkageSpec;
	}

	bool ASTNodeImporter::ImportDefinition(ObjCInterfaceDecl *From,
	ObjCInterfaceDecl *To,
	ImportDefinitionKind Kind) {
	if (To->getDefinition()) {
	// Check consistency of superclass.
	ObjCInterfaceDecl *FromSuper = From->getSuperClass();
	if (FromSuper) {
	FromSuper = cast_or_null<ObjCInterfaceDecl>(Importer.Import(FromSuper));
	if (!FromSuper)
	return true;
	}

	ObjCInterfaceDecl *ToSuper = To->getSuperClass();
	if ((bool)FromSuper != (bool)ToSuper \|\|
	(FromSuper && !declaresSameEntity(FromSuper, ToSuper))) {
	Importer.ToDiag(To->getLocation(),
	diag::err_odr_objc_superclass_inconsistent)
	<< To->getDeclName();
	if (ToSuper)
	Importer.ToDiag(To->getSuperClassLoc(), diag::note_odr_objc_superclass)
	<< To->getSuperClass()->getDeclName();
	else
	Importer.ToDiag(To->getLocation(),
	diag::note_odr_objc_missing_superclass);
	if (From->getSuperClass())
	Importer.FromDiag(From->getSuperClassLoc(),
	diag::note_odr_objc_superclass)
	<< From->getSuperClass()->getDeclName();
	else
	Importer.FromDiag(From->getLocation(),
	diag::note_odr_objc_missing_superclass);
	}

	if (shouldForceImportDeclContext(Kind))
	ImportDeclContext(From);
	return false;
	}

	// Start the definition.
	To->startDefinition();

	// If this class has a superclass, import it.
	if (From->getSuperClass()) {
	TypeSourceInfo *SuperTInfo = Importer.Import(From->getSuperClassTInfo());
	if (!SuperTInfo)
	return true;

	To->setSuperClass(SuperTInfo);
	}

	// Import protocols
	SmallVector<ObjCProtocolDecl *, 4> Protocols;
	SmallVector<SourceLocation, 4> ProtocolLocs;
	ObjCInterfaceDecl::protocol_loc_iterator
	FromProtoLoc = From->protocol_loc_begin();

	for (ObjCInterfaceDecl::protocol_iterator FromProto = From->protocol_begin(),
	FromProtoEnd = From->protocol_end();
	FromProto != FromProtoEnd;
	++FromProto, ++FromProtoLoc) {
	ObjCProtocolDecl *ToProto
	= cast_or_null<ObjCProtocolDecl>(Importer.Import(*FromProto));
	if (!ToProto)
	return true;
	Protocols.push_back(ToProto);
	ProtocolLocs.push_back(Importer.Import(*FromProtoLoc));
	}

	// FIXME: If we're merging, make sure that the protocol list is the same.
	To->setProtocolList(Protocols.data(), Protocols.size(),
	ProtocolLocs.data(), Importer.getToContext());

	// Import categories. When the categories themselves are imported, they'll
	// hook themselves into this interface.
	for (auto *Cat : From->known_categories())
	Importer.Import(Cat);

	// If we have an @implementation, import it as well.
	if (From->getImplementation()) {
	ObjCImplementationDecl *Impl = cast_or_null<ObjCImplementationDecl>(
	Importer.Import(From->getImplementation()));
	if (!Impl)
	return true;

	To->setImplementation(Impl);
	}

	if (shouldForceImportDeclContext(Kind)) {
	// Import all of the members of this class.
	ImportDeclContext(From, /ForceImport=/true);
	}
	return false;
	}

	ObjCTypeParamList *
	ASTNodeImporter::ImportObjCTypeParamList(ObjCTypeParamList *list) {
	if (!list)
	return nullptr;

	SmallVector<ObjCTypeParamDecl *, 4> toTypeParams;
	for (auto fromTypeParam : *list) {
	auto toTypeParam = cast_or_null<ObjCTypeParamDecl>(
	Importer.Import(fromTypeParam));
	if (!toTypeParam)
	return nullptr;

	toTypeParams.push_back(toTypeParam);
	}

	return ObjCTypeParamList::create(Importer.getToContext(),
	Importer.Import(list->getLAngleLoc()),
	toTypeParams,
	Importer.Import(list->getRAngleLoc()));
	}

	Decl ASTNodeImporter::VisitObjCInterfaceDecl(ObjCInterfaceDecl D) {
	// If this class has a definition in the translation unit we're coming from,
	// but this particular declaration is not that definition, import the
	// definition and map to that.
	ObjCInterfaceDecl *Definition = D->getDefinition();
	if (Definition && Definition != D) {
	Decl *ImportedDef = Importer.Import(Definition);
	if (!ImportedDef)
	return nullptr;

	return Importer.Imported(D, ImportedDef);
	}

	// Import the major distinguishing characteristics of an @interface.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	// Look for an existing interface with the same name.
	ObjCInterfaceDecl *MergeWithIface = nullptr;
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (!FoundDecls[I]->isInIdentifierNamespace(Decl::IDNS_Ordinary))
	continue;

	if ((MergeWithIface = dyn_cast<ObjCInterfaceDecl>(FoundDecls[I])))
	break;
	}

	// Create an interface declaration, if one does not already exist.
	ObjCInterfaceDecl *ToIface = MergeWithIface;
	if (!ToIface) {
	ToIface = ObjCInterfaceDecl::Create(Importer.getToContext(), DC,
	Importer.Import(D->getAtStartLoc()),
	Name.getAsIdentifierInfo(),
	/TypeParamList=/nullptr,
	/PrevDecl=/nullptr, Loc,
	D->isImplicitInterfaceDecl());
	ToIface->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(ToIface);
	}
	Importer.Imported(D, ToIface);
	// Import the type parameter list after calling Imported, to avoid
	// loops when bringing in their DeclContext.
	ToIface->setTypeParamList(ImportObjCTypeParamList(
	D->getTypeParamListAsWritten()));

	if (D->isThisDeclarationADefinition() && ImportDefinition(D, ToIface))
	return nullptr;

	return ToIface;
	}

	Decl ASTNodeImporter::VisitObjCCategoryImplDecl(ObjCCategoryImplDecl D) {
	ObjCCategoryDecl *Category = cast_or_null<ObjCCategoryDecl>(
	Importer.Import(D->getCategoryDecl()));
	if (!Category)
	return nullptr;

	ObjCCategoryImplDecl *ToImpl = Category->getImplementation();
	if (!ToImpl) {
	DeclContext *DC = Importer.ImportContext(D->getDeclContext());
	if (!DC)
	return nullptr;

	SourceLocation CategoryNameLoc = Importer.Import(D->getCategoryNameLoc());
	ToImpl = ObjCCategoryImplDecl::Create(Importer.getToContext(), DC,
	Importer.Import(D->getIdentifier()),
	Category->getClassInterface(),
	Importer.Import(D->getLocation()),
	Importer.Import(D->getAtStartLoc()),
	CategoryNameLoc);

	DeclContext *LexicalDC = DC;
	if (D->getDeclContext() != D->getLexicalDeclContext()) {
	LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());
	if (!LexicalDC)
	return nullptr;

	ToImpl->setLexicalDeclContext(LexicalDC);
	}

	LexicalDC->addDeclInternal(ToImpl);
	Category->setImplementation(ToImpl);
	}

	Importer.Imported(D, ToImpl);
	ImportDeclContext(D);
	return ToImpl;
	}

	Decl ASTNodeImporter::VisitObjCImplementationDecl(ObjCImplementationDecl D) {
	// Find the corresponding interface.
	ObjCInterfaceDecl *Iface = cast_or_null<ObjCInterfaceDecl>(
	Importer.Import(D->getClassInterface()));
	if (!Iface)
	return nullptr;

	// Import the superclass, if any.
	ObjCInterfaceDecl *Super = nullptr;
	if (D->getSuperClass()) {
	Super = cast_or_null<ObjCInterfaceDecl>(
	Importer.Import(D->getSuperClass()));
	if (!Super)
	return nullptr;
	}

	ObjCImplementationDecl *Impl = Iface->getImplementation();
	if (!Impl) {
	// We haven't imported an implementation yet. Create a new @implementation
	// now.
	Impl = ObjCImplementationDecl::Create(Importer.getToContext(),
	Importer.ImportContext(D->getDeclContext()),
	Iface, Super,
	Importer.Import(D->getLocation()),
	Importer.Import(D->getAtStartLoc()),
	Importer.Import(D->getSuperClassLoc()),
	Importer.Import(D->getIvarLBraceLoc()),
	Importer.Import(D->getIvarRBraceLoc()));

	if (D->getDeclContext() != D->getLexicalDeclContext()) {
	DeclContext *LexicalDC
	= Importer.ImportContext(D->getLexicalDeclContext());
	if (!LexicalDC)
	return nullptr;
	Impl->setLexicalDeclContext(LexicalDC);
	}

	// Associate the implementation with the class it implements.
	Iface->setImplementation(Impl);
	Importer.Imported(D, Iface->getImplementation());
	} else {
	Importer.Imported(D, Iface->getImplementation());

	// Verify that the existing @implementation has the same superclass.
	if ((Super && !Impl->getSuperClass()) \|\|
	(!Super && Impl->getSuperClass()) \|\|
	(Super && Impl->getSuperClass() &&
	!declaresSameEntity(Super->getCanonicalDecl(),
	Impl->getSuperClass()))) {
	Importer.ToDiag(Impl->getLocation(),
	diag::err_odr_objc_superclass_inconsistent)
	<< Iface->getDeclName();
	// FIXME: It would be nice to have the location of the superclass
	// below.
	if (Impl->getSuperClass())
	Importer.ToDiag(Impl->getLocation(),
	diag::note_odr_objc_superclass)
	<< Impl->getSuperClass()->getDeclName();
	else
	Importer.ToDiag(Impl->getLocation(),
	diag::note_odr_objc_missing_superclass);
	if (D->getSuperClass())
	Importer.FromDiag(D->getLocation(),
	diag::note_odr_objc_superclass)
	<< D->getSuperClass()->getDeclName();
	else
	Importer.FromDiag(D->getLocation(),
	diag::note_odr_objc_missing_superclass);
	return nullptr;
	}
	}

	// Import all of the members of this @implementation.
	ImportDeclContext(D);

	return Impl;
	}

	Decl ASTNodeImporter::VisitObjCPropertyDecl(ObjCPropertyDecl D) {
	// Import the major distinguishing characteristics of an @property.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	// Check whether we have already imported this property.
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (ObjCPropertyDecl *FoundProp
	= dyn_cast<ObjCPropertyDecl>(FoundDecls[I])) {
	// Check property types.
	if (!Importer.IsStructurallyEquivalent(D->getType(),
	FoundProp->getType())) {
	Importer.ToDiag(Loc, diag::err_odr_objc_property_type_inconsistent)
	<< Name << D->getType() << FoundProp->getType();
	Importer.ToDiag(FoundProp->getLocation(), diag::note_odr_value_here)
	<< FoundProp->getType();
	return nullptr;
	}

	// FIXME: Check property attributes, getters, setters, etc.?

	// Consider these properties to be equivalent.
	Importer.Imported(D, FoundProp);
	return FoundProp;
	}
	}

	// Import the type.
	TypeSourceInfo *TSI = Importer.Import(D->getTypeSourceInfo());
	if (!TSI)
	return nullptr;

	// Create the new property.
	ObjCPropertyDecl *ToProperty
	= ObjCPropertyDecl::Create(Importer.getToContext(), DC, Loc,
	Name.getAsIdentifierInfo(),
	Importer.Import(D->getAtLoc()),
	Importer.Import(D->getLParenLoc()),
	Importer.Import(D->getType()),
	TSI,
	D->getPropertyImplementation());
	Importer.Imported(D, ToProperty);
	ToProperty->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(ToProperty);

	ToProperty->setPropertyAttributes(D->getPropertyAttributes());
	ToProperty->setPropertyAttributesAsWritten(
	D->getPropertyAttributesAsWritten());
	ToProperty->setGetterName(Importer.Import(D->getGetterName()),
	Importer.Import(D->getGetterNameLoc()));
	ToProperty->setSetterName(Importer.Import(D->getSetterName()),
	Importer.Import(D->getSetterNameLoc()));
	ToProperty->setGetterMethodDecl(
	cast_or_null<ObjCMethodDecl>(Importer.Import(D->getGetterMethodDecl())));
	ToProperty->setSetterMethodDecl(
	cast_or_null<ObjCMethodDecl>(Importer.Import(D->getSetterMethodDecl())));
	ToProperty->setPropertyIvarDecl(
	cast_or_null<ObjCIvarDecl>(Importer.Import(D->getPropertyIvarDecl())));
	return ToProperty;
	}

	Decl ASTNodeImporter::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl D) {
	ObjCPropertyDecl *Property = cast_or_null<ObjCPropertyDecl>(
	Importer.Import(D->getPropertyDecl()));
	if (!Property)
	return nullptr;

	DeclContext *DC = Importer.ImportContext(D->getDeclContext());
	if (!DC)
	return nullptr;

	// Import the lexical declaration context.
	DeclContext *LexicalDC = DC;
	if (D->getDeclContext() != D->getLexicalDeclContext()) {
	LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());
	if (!LexicalDC)
	return nullptr;
	}

	ObjCImplDecl *InImpl = dyn_cast<ObjCImplDecl>(LexicalDC);
	if (!InImpl)
	return nullptr;

	// Import the ivar (for an @synthesize).
	ObjCIvarDecl *Ivar = nullptr;
	if (D->getPropertyIvarDecl()) {
	Ivar = cast_or_null<ObjCIvarDecl>(
	Importer.Import(D->getPropertyIvarDecl()));
	if (!Ivar)
	return nullptr;
	}

	ObjCPropertyImplDecl *ToImpl
	= InImpl->FindPropertyImplDecl(Property->getIdentifier(),
	Property->getQueryKind());
	if (!ToImpl) {
	ToImpl = ObjCPropertyImplDecl::Create(Importer.getToContext(), DC,
	Importer.Import(D->getLocStart()),
	Importer.Import(D->getLocation()),
	Property,
	D->getPropertyImplementation(),
	Ivar,
	Importer.Import(D->getPropertyIvarDeclLoc()));
	ToImpl->setLexicalDeclContext(LexicalDC);
	Importer.Imported(D, ToImpl);
	LexicalDC->addDeclInternal(ToImpl);
	} else {
	// Check that we have the same kind of property implementation (@synthesize
	// vs. @dynamic).
	if (D->getPropertyImplementation() != ToImpl->getPropertyImplementation()) {
	Importer.ToDiag(ToImpl->getLocation(),
	diag::err_odr_objc_property_impl_kind_inconsistent)
	<< Property->getDeclName()
	<< (ToImpl->getPropertyImplementation()
	== ObjCPropertyImplDecl::Dynamic);
	Importer.FromDiag(D->getLocation(),
	diag::note_odr_objc_property_impl_kind)
	<< D->getPropertyDecl()->getDeclName()
	<< (D->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic);
	return nullptr;
	}

	// For @synthesize, check that we have the same
	if (D->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize &&
	Ivar != ToImpl->getPropertyIvarDecl()) {
	Importer.ToDiag(ToImpl->getPropertyIvarDeclLoc(),
	diag::err_odr_objc_synthesize_ivar_inconsistent)
	<< Property->getDeclName()
	<< ToImpl->getPropertyIvarDecl()->getDeclName()
	<< Ivar->getDeclName();
	Importer.FromDiag(D->getPropertyIvarDeclLoc(),
	diag::note_odr_objc_synthesize_ivar_here)
	<< D->getPropertyIvarDecl()->getDeclName();
	return nullptr;
	}

	// Merge the existing implementation with the new implementation.
	Importer.Imported(D, ToImpl);
	}

	return ToImpl;
	}

	Decl ASTNodeImporter::VisitTemplateTypeParmDecl(TemplateTypeParmDecl D) {
	// For template arguments, we adopt the translation unit as our declaration
	// context. This context will be fixed when the actual template declaration
	// is created.

	// FIXME: Import default argument.
	return TemplateTypeParmDecl::Create(Importer.getToContext(),
	Importer.getToContext().getTranslationUnitDecl(),
	Importer.Import(D->getLocStart()),
	Importer.Import(D->getLocation()),
	D->getDepth(),
	D->getIndex(),
	Importer.Import(D->getIdentifier()),
	D->wasDeclaredWithTypename(),
	D->isParameterPack());
	}

	Decl *
	ASTNodeImporter::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
	// Import the name of this declaration.
	DeclarationName Name = Importer.Import(D->getDeclName());
	if (D->getDeclName() && !Name)
	return nullptr;

	// Import the location of this declaration.
	SourceLocation Loc = Importer.Import(D->getLocation());

	// Import the type of this declaration.
	QualType T = Importer.Import(D->getType());
	if (T.isNull())
	return nullptr;

	// Import type-source information.
	TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
	if (D->getTypeSourceInfo() && !TInfo)
	return nullptr;

	// FIXME: Import default argument.

	return NonTypeTemplateParmDecl::Create(Importer.getToContext(),
	Importer.getToContext().getTranslationUnitDecl(),
	Importer.Import(D->getInnerLocStart()),
	Loc, D->getDepth(), D->getPosition(),
	Name.getAsIdentifierInfo(),
	T, D->isParameterPack(), TInfo);
	}

	Decl *
	ASTNodeImporter::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) {
	// Import the name of this declaration.
	DeclarationName Name = Importer.Import(D->getDeclName());
	if (D->getDeclName() && !Name)
	return nullptr;

	// Import the location of this declaration.
	SourceLocation Loc = Importer.Import(D->getLocation());

	// Import template parameters.
	TemplateParameterList *TemplateParams
	= ImportTemplateParameterList(D->getTemplateParameters());
	if (!TemplateParams)
	return nullptr;

	// FIXME: Import default argument.

	return TemplateTemplateParmDecl::Create(Importer.getToContext(),
	Importer.getToContext().getTranslationUnitDecl(),
	Loc, D->getDepth(), D->getPosition(),
	D->isParameterPack(),
	Name.getAsIdentifierInfo(),
	TemplateParams);
	}

	Decl ASTNodeImporter::VisitClassTemplateDecl(ClassTemplateDecl D) {
	// If this record has a definition in the translation unit we're coming from,
	// but this particular declaration is not that definition, import the
	// definition and map to that.
	CXXRecordDecl *Definition
	= cast_or_null<CXXRecordDecl>(D->getTemplatedDecl()->getDefinition());
	if (Definition && Definition != D->getTemplatedDecl()) {
	Decl *ImportedDef
	= Importer.Import(Definition->getDescribedClassTemplate());
	if (!ImportedDef)
	return nullptr;

	return Importer.Imported(D, ImportedDef);
	}

	// Import the major distinguishing characteristics of this class template.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	// We may already have a template of the same name; try to find and match it.
	if (!DC->isFunctionOrMethod()) {
	SmallVector<NamedDecl *, 4> ConflictingDecls;
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (!FoundDecls[I]->isInIdentifierNamespace(Decl::IDNS_Ordinary))
	continue;

	Decl *Found = FoundDecls[I];
	if (ClassTemplateDecl *FoundTemplate
	= dyn_cast<ClassTemplateDecl>(Found)) {
	if (IsStructuralMatch(D, FoundTemplate)) {
	// The class templates structurally match; call it the same template.
	// FIXME: We may be filling in a forward declaration here. Handle
	// this case!
	Importer.Imported(D->getTemplatedDecl(),
	FoundTemplate->getTemplatedDecl());
	return Importer.Imported(D, FoundTemplate);
	}
	}

	ConflictingDecls.push_back(FoundDecls[I]);
	}

	if (!ConflictingDecls.empty()) {
	Name = Importer.HandleNameConflict(Name, DC, Decl::IDNS_Ordinary,
	ConflictingDecls.data(),
	ConflictingDecls.size());
	}

	if (!Name)
	return nullptr;
	}

	CXXRecordDecl *DTemplated = D->getTemplatedDecl();

	// Create the declaration that is being templated.
	// Create the declaration that is being templated.
	CXXRecordDecl *D2Templated = cast_or_null<CXXRecordDecl>(
	Importer.Import(DTemplated));
	if (!D2Templated)
	return nullptr;

	// Resolve possible cyclic import.
	if (Decl *AlreadyImported = Importer.GetAlreadyImportedOrNull(D))
	return AlreadyImported;

	// Create the class template declaration itself.
	TemplateParameterList *TemplateParams
	= ImportTemplateParameterList(D->getTemplateParameters());
	if (!TemplateParams)
	return nullptr;

	ClassTemplateDecl *D2 = ClassTemplateDecl::Create(Importer.getToContext(), DC,
	Loc, Name, TemplateParams,
	D2Templated);
	D2Templated->setDescribedClassTemplate(D2);

	D2->setAccess(D->getAccess());
	D2->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(D2);

	// Note the relationship between the class templates.
	Importer.Imported(D, D2);
	Importer.Imported(DTemplated, D2Templated);

	if (DTemplated->isCompleteDefinition() &&
	!D2Templated->isCompleteDefinition()) {
	// FIXME: Import definition!
	}

	return D2;
	}

	Decl *ASTNodeImporter::VisitClassTemplateSpecializationDecl(
	ClassTemplateSpecializationDecl *D) {
	// If this record has a definition in the translation unit we're coming from,
	// but this particular declaration is not that definition, import the
	// definition and map to that.
	TagDecl *Definition = D->getDefinition();
	if (Definition && Definition != D) {
	Decl *ImportedDef = Importer.Import(Definition);
	if (!ImportedDef)
	return nullptr;

	return Importer.Imported(D, ImportedDef);
	}

	ClassTemplateDecl *ClassTemplate
	= cast_or_null<ClassTemplateDecl>(Importer.Import(
	D->getSpecializedTemplate()));
	if (!ClassTemplate)
	return nullptr;

	// Import the context of this declaration.
	DeclContext *DC = ClassTemplate->getDeclContext();
	if (!DC)
	return nullptr;

	DeclContext *LexicalDC = DC;
	if (D->getDeclContext() != D->getLexicalDeclContext()) {
	LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());
	if (!LexicalDC)
	return nullptr;
	}

	// Import the location of this declaration.
	SourceLocation StartLoc = Importer.Import(D->getLocStart());
	SourceLocation IdLoc = Importer.Import(D->getLocation());

	// Import template arguments.
	SmallVector<TemplateArgument, 2> TemplateArgs;
	if (ImportTemplateArguments(D->getTemplateArgs().data(),
	D->getTemplateArgs().size(),
	TemplateArgs))
	return nullptr;

	// Try to find an existing specialization with these template arguments.
	void *InsertPos = nullptr;
	ClassTemplateSpecializationDecl *D2
	= ClassTemplate->findSpecialization(TemplateArgs, InsertPos);
	if (D2) {
	// We already have a class template specialization with these template
	// arguments.

	// FIXME: Check for specialization vs. instantiation errors.

	if (RecordDecl *FoundDef = D2->getDefinition()) {
	if (!D->isCompleteDefinition() \|\| IsStructuralMatch(D, FoundDef)) {
	// The record types structurally match, or the "from" translation
	// unit only had a forward declaration anyway; call it the same
	// function.
	return Importer.Imported(D, FoundDef);
	}
	}
	} else {
	// Create a new specialization.
	if (ClassTemplatePartialSpecializationDecl *PartialSpec =
	dyn_cast<ClassTemplatePartialSpecializationDecl>(D)) {

	// Import TemplateArgumentListInfo
	TemplateArgumentListInfo ToTAInfo;
	auto &ASTTemplateArgs = *PartialSpec->getTemplateArgsAsWritten();
	for (unsigned I = 0, E = ASTTemplateArgs.NumTemplateArgs; I < E; ++I) {
	bool Error = false;
	auto ToLoc = ImportTemplateArgumentLoc(ASTTemplateArgs[I], Error);
	if (Error)
	return nullptr;
	ToTAInfo.addArgument(ToLoc);
	}

	QualType CanonInjType = Importer.Import(
	PartialSpec->getInjectedSpecializationType());
	if (CanonInjType.isNull())
	return nullptr;
	CanonInjType = CanonInjType.getCanonicalType();

	TemplateParameterList *ToTPList = ImportTemplateParameterList(
	PartialSpec->getTemplateParameters());
	if (!ToTPList && PartialSpec->getTemplateParameters())
	return nullptr;

	D2 = ClassTemplatePartialSpecializationDecl::Create(
	Importer.getToContext(), D->getTagKind(), DC, StartLoc, IdLoc,
	ToTPList, ClassTemplate,
	llvm::makeArrayRef(TemplateArgs.data(), TemplateArgs.size()),
	ToTAInfo, CanonInjType, nullptr);

	} else {
	D2 = ClassTemplateSpecializationDecl::Create(Importer.getToContext(),
	D->getTagKind(), DC,
	StartLoc, IdLoc,
	ClassTemplate,
	TemplateArgs,
	/PrevDecl=/nullptr);
	}

	D2->setSpecializationKind(D->getSpecializationKind());

	// Add this specialization to the class template.
	ClassTemplate->AddSpecialization(D2, InsertPos);

	// Import the qualifier, if any.
	D2->setQualifierInfo(Importer.Import(D->getQualifierLoc()));

	Importer.Imported(D, D2);

	if (auto *TSI = D->getTypeAsWritten()) {
	TypeSourceInfo *TInfo = Importer.Import(TSI);
	if (!TInfo)
	return nullptr;
	D2->setTypeAsWritten(TInfo);
	D2->setTemplateKeywordLoc(Importer.Import(D->getTemplateKeywordLoc()));
	D2->setExternLoc(Importer.Import(D->getExternLoc()));
	}

	SourceLocation POI = Importer.Import(D->getPointOfInstantiation());
	if (POI.isValid())
	D2->setPointOfInstantiation(POI);
	else if (D->getPointOfInstantiation().isValid())
	return nullptr;

	D2->setTemplateSpecializationKind(D->getTemplateSpecializationKind());

	// Add the specialization to this context.
	D2->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(D2);
	}
	Importer.Imported(D, D2);
	if (D->isCompleteDefinition() && ImportDefinition(D, D2))
	return nullptr;

	return D2;
	}

	Decl ASTNodeImporter::VisitVarTemplateDecl(VarTemplateDecl D) {
	// If this variable has a definition in the translation unit we're coming
	// from,
	// but this particular declaration is not that definition, import the
	// definition and map to that.
	VarDecl *Definition =
	cast_or_null<VarDecl>(D->getTemplatedDecl()->getDefinition());
	if (Definition && Definition != D->getTemplatedDecl()) {
	Decl *ImportedDef = Importer.Import(Definition->getDescribedVarTemplate());
	if (!ImportedDef)
	return nullptr;

	return Importer.Imported(D, ImportedDef);
	}

	// Import the major distinguishing characteristics of this variable template.
	DeclContext DC, LexicalDC;
	DeclarationName Name;
	SourceLocation Loc;
	NamedDecl *ToD;
	if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
	return nullptr;
	if (ToD)
	return ToD;

	// We may already have a template of the same name; try to find and match it.
	assert(!DC->isFunctionOrMethod() &&
	"Variable templates cannot be declared at function scope");
	SmallVector<NamedDecl *, 4> ConflictingDecls;
	SmallVector<NamedDecl *, 2> FoundDecls;
	DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
	for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
	if (!FoundDecls[I]->isInIdentifierNamespace(Decl::IDNS_Ordinary))
	continue;

	Decl *Found = FoundDecls[I];
	if (VarTemplateDecl *FoundTemplate = dyn_cast<VarTemplateDecl>(Found)) {
	if (IsStructuralMatch(D, FoundTemplate)) {
	// The variable templates structurally match; call it the same template.
	Importer.Imported(D->getTemplatedDecl(),
	FoundTemplate->getTemplatedDecl());
	return Importer.Imported(D, FoundTemplate);
	}
	}

	ConflictingDecls.push_back(FoundDecls[I]);
	}

	if (!ConflictingDecls.empty()) {
	Name = Importer.HandleNameConflict(Name, DC, Decl::IDNS_Ordinary,
	ConflictingDecls.data(),
	ConflictingDecls.size());
	}

	if (!Name)
	return nullptr;

	VarDecl *DTemplated = D->getTemplatedDecl();

	// Import the type.
	QualType T = Importer.Import(DTemplated->getType());
	if (T.isNull())
	return nullptr;

	// Create the declaration that is being templated.
	SourceLocation StartLoc = Importer.Import(DTemplated->getLocStart());
	SourceLocation IdLoc = Importer.Import(DTemplated->getLocation());
	TypeSourceInfo *TInfo = Importer.Import(DTemplated->getTypeSourceInfo());
	VarDecl *D2Templated = VarDecl::Create(Importer.getToContext(), DC, StartLoc,
	IdLoc, Name.getAsIdentifierInfo(), T,
	TInfo, DTemplated->getStorageClass());
	D2Templated->setAccess(DTemplated->getAccess());
	D2Templated->setQualifierInfo(Importer.Import(DTemplated->getQualifierLoc()));
	D2Templated->setLexicalDeclContext(LexicalDC);

	// Importer.Imported(DTemplated, D2Templated);
	// LexicalDC->addDeclInternal(D2Templated);

	// Merge the initializer.
	if (ImportDefinition(DTemplated, D2Templated))
	return nullptr;

	// Create the variable template declaration itself.
	TemplateParameterList *TemplateParams =
	ImportTemplateParameterList(D->getTemplateParameters());
	if (!TemplateParams)
	return nullptr;

	VarTemplateDecl *D2 = VarTemplateDecl::Create(
	Importer.getToContext(), DC, Loc, Name, TemplateParams, D2Templated);
	D2Templated->setDescribedVarTemplate(D2);

	D2->setAccess(D->getAccess());
	D2->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(D2);

	// Note the relationship between the variable templates.
	Importer.Imported(D, D2);
	Importer.Imported(DTemplated, D2Templated);

	if (DTemplated->isThisDeclarationADefinition() &&
	!D2Templated->isThisDeclarationADefinition()) {
	// FIXME: Import definition!
	}

	return D2;
	}

	Decl *ASTNodeImporter::VisitVarTemplateSpecializationDecl(
	VarTemplateSpecializationDecl *D) {
	// If this record has a definition in the translation unit we're coming from,
	// but this particular declaration is not that definition, import the
	// definition and map to that.
	VarDecl *Definition = D->getDefinition();
	if (Definition && Definition != D) {
	Decl *ImportedDef = Importer.Import(Definition);
	if (!ImportedDef)
	return nullptr;

	return Importer.Imported(D, ImportedDef);
	}

	VarTemplateDecl *VarTemplate = cast_or_null<VarTemplateDecl>(
	Importer.Import(D->getSpecializedTemplate()));
	if (!VarTemplate)
	return nullptr;

	// Import the context of this declaration.
	DeclContext *DC = VarTemplate->getDeclContext();
	if (!DC)
	return nullptr;

	DeclContext *LexicalDC = DC;
	if (D->getDeclContext() != D->getLexicalDeclContext()) {
	LexicalDC = Importer.ImportContext(D->getLexicalDeclContext());
	if (!LexicalDC)
	return nullptr;
	}

	// Import the location of this declaration.
	SourceLocation StartLoc = Importer.Import(D->getLocStart());
	SourceLocation IdLoc = Importer.Import(D->getLocation());

	// Import template arguments.
	SmallVector<TemplateArgument, 2> TemplateArgs;
	if (ImportTemplateArguments(D->getTemplateArgs().data(),
	D->getTemplateArgs().size(), TemplateArgs))
	return nullptr;

	// Try to find an existing specialization with these template arguments.
	void *InsertPos = nullptr;
	VarTemplateSpecializationDecl *D2 = VarTemplate->findSpecialization(
	TemplateArgs, InsertPos);
	if (D2) {
	// We already have a variable template specialization with these template
	// arguments.

	// FIXME: Check for specialization vs. instantiation errors.

	if (VarDecl *FoundDef = D2->getDefinition()) {
	if (!D->isThisDeclarationADefinition() \|\|
	IsStructuralMatch(D, FoundDef)) {
	// The record types structurally match, or the "from" translation
	// unit only had a forward declaration anyway; call it the same
	// variable.
	return Importer.Imported(D, FoundDef);
	}
	}
	} else {

	// Import the type.
	QualType T = Importer.Import(D->getType());
	if (T.isNull())
	return nullptr;
	TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());

	// Create a new specialization.
	D2 = VarTemplateSpecializationDecl::Create(
	Importer.getToContext(), DC, StartLoc, IdLoc, VarTemplate, T, TInfo,
	D->getStorageClass(), TemplateArgs);
	D2->setSpecializationKind(D->getSpecializationKind());
	D2->setTemplateArgsInfo(D->getTemplateArgsInfo());

	// Add this specialization to the class template.
	VarTemplate->AddSpecialization(D2, InsertPos);

	// Import the qualifier, if any.
	D2->setQualifierInfo(Importer.Import(D->getQualifierLoc()));

	// Add the specialization to this context.
	D2->setLexicalDeclContext(LexicalDC);
	LexicalDC->addDeclInternal(D2);
	}
	Importer.Imported(D, D2);

	if (D->isThisDeclarationADefinition() && ImportDefinition(D, D2))
	return nullptr;

	return D2;
	}

	//----------------------------------------------------------------------------
	// Import Statements
	//----------------------------------------------------------------------------

	DeclGroupRef ASTNodeImporter::ImportDeclGroup(DeclGroupRef DG) {
	if (DG.isNull())
	return DeclGroupRef::Create(Importer.getToContext(), nullptr, 0);
	size_t NumDecls = DG.end() - DG.begin();
	SmallVector<Decl *, 1> ToDecls(NumDecls);
	auto &_Importer = this->Importer;
	std::transform(DG.begin(), DG.end(), ToDecls.begin(),
	[&_Importer](Decl D) -> Decl {
	return _Importer.Import(D);
	});
	return DeclGroupRef::Create(Importer.getToContext(),
	ToDecls.begin(),
	NumDecls);
	}

	Stmt ASTNodeImporter::VisitStmt(Stmt S) {
	Importer.FromDiag(S->getLocStart(), diag::err_unsupported_ast_node)
	<< S->getStmtClassName();
	return nullptr;
	}


	Stmt ASTNodeImporter::VisitGCCAsmStmt(GCCAsmStmt S) {
	SmallVector<IdentifierInfo *, 4> Names;
	for (unsigned I = 0, E = S->getNumOutputs(); I != E; I++) {
	IdentifierInfo *ToII = Importer.Import(S->getOutputIdentifier(I));
	// ToII is nullptr when no symbolic name is given for output operand
	// see ParseStmtAsm::ParseAsmOperandsOpt
	if (!ToII && S->getOutputIdentifier(I))
	return nullptr;
	Names.push_back(ToII);
	}
	for (unsigned I = 0, E = S->getNumInputs(); I != E; I++) {
	IdentifierInfo *ToII = Importer.Import(S->getInputIdentifier(I));
	// ToII is nullptr when no symbolic name is given for input operand
	// see ParseStmtAsm::ParseAsmOperandsOpt
	if (!ToII && S->getInputIdentifier(I))
	return nullptr;
	Names.push_back(ToII);
	}

	SmallVector<StringLiteral *, 4> Clobbers;
	for (unsigned I = 0, E = S->getNumClobbers(); I != E; I++) {
	StringLiteral *Clobber = cast_or_null<StringLiteral>(
	Importer.Import(S->getClobberStringLiteral(I)));
	if (!Clobber)
	return nullptr;
	Clobbers.push_back(Clobber);
	}

	SmallVector<StringLiteral *, 4> Constraints;
	for (unsigned I = 0, E = S->getNumOutputs(); I != E; I++) {
	StringLiteral *Output = cast_or_null<StringLiteral>(
	Importer.Import(S->getOutputConstraintLiteral(I)));
	if (!Output)
	return nullptr;
	Constraints.push_back(Output);
	}

	for (unsigned I = 0, E = S->getNumInputs(); I != E; I++) {
	StringLiteral *Input = cast_or_null<StringLiteral>(
	Importer.Import(S->getInputConstraintLiteral(I)));
	if (!Input)
	return nullptr;
	Constraints.push_back(Input);
	}

	SmallVector<Expr *, 4> Exprs(S->getNumOutputs() + S->getNumInputs());
	if (ImportContainerChecked(S->outputs(), Exprs))
	return nullptr;

	if (ImportArrayChecked(S->inputs(), Exprs.begin() + S->getNumOutputs()))
	return nullptr;

	StringLiteral *AsmStr = cast_or_null<StringLiteral>(
	Importer.Import(S->getAsmString()));
	if (!AsmStr)
	return nullptr;

	return new (Importer.getToContext()) GCCAsmStmt(
	Importer.getToContext(),
	Importer.Import(S->getAsmLoc()),
	S->isSimple(),
	S->isVolatile(),
	S->getNumOutputs(),
	S->getNumInputs(),
	Names.data(),
	Constraints.data(),
	Exprs.data(),
	AsmStr,
	S->getNumClobbers(),
	Clobbers.data(),
	Importer.Import(S->getRParenLoc()));
	}

	Stmt ASTNodeImporter::VisitDeclStmt(DeclStmt S) {
	DeclGroupRef ToDG = ImportDeclGroup(S->getDeclGroup());
	for (Decl *ToD : ToDG) {
	if (!ToD)
	return nullptr;
	}
	SourceLocation ToStartLoc = Importer.Import(S->getStartLoc());
	SourceLocation ToEndLoc = Importer.Import(S->getEndLoc());
	return new (Importer.getToContext()) DeclStmt(ToDG, ToStartLoc, ToEndLoc);
	}

	Stmt ASTNodeImporter::VisitNullStmt(NullStmt S) {
	SourceLocation ToSemiLoc = Importer.Import(S->getSemiLoc());
	return new (Importer.getToContext()) NullStmt(ToSemiLoc,
	S->hasLeadingEmptyMacro());
	}

	Stmt ASTNodeImporter::VisitCompoundStmt(CompoundStmt S) {
	llvm::SmallVector<Stmt *, 8> ToStmts(S->size());

	if (ImportContainerChecked(S->body(), ToStmts))
	return nullptr;

	SourceLocation ToLBraceLoc = Importer.Import(S->getLBracLoc());
	SourceLocation ToRBraceLoc = Importer.Import(S->getRBracLoc());
	return new (Importer.getToContext()) CompoundStmt(Importer.getToContext(),
	ToStmts,
	ToLBraceLoc, ToRBraceLoc);
	}

	Stmt ASTNodeImporter::VisitCaseStmt(CaseStmt S) {
	Expr *ToLHS = Importer.Import(S->getLHS());
	if (!ToLHS)
	return nullptr;
	Expr *ToRHS = Importer.Import(S->getRHS());
	if (!ToRHS && S->getRHS())
	return nullptr;
	SourceLocation ToCaseLoc = Importer.Import(S->getCaseLoc());
	SourceLocation ToEllipsisLoc = Importer.Import(S->getEllipsisLoc());
	SourceLocation ToColonLoc = Importer.Import(S->getColonLoc());
	return new (Importer.getToContext()) CaseStmt(ToLHS, ToRHS,
	ToCaseLoc, ToEllipsisLoc,
	ToColonLoc);
	}

	Stmt ASTNodeImporter::VisitDefaultStmt(DefaultStmt S) {
	SourceLocation ToDefaultLoc = Importer.Import(S->getDefaultLoc());
	SourceLocation ToColonLoc = Importer.Import(S->getColonLoc());
	Stmt *ToSubStmt = Importer.Import(S->getSubStmt());
	if (!ToSubStmt && S->getSubStmt())
	return nullptr;
	return new (Importer.getToContext()) DefaultStmt(ToDefaultLoc, ToColonLoc,
	ToSubStmt);
	}

	Stmt ASTNodeImporter::VisitLabelStmt(LabelStmt S) {
	SourceLocation ToIdentLoc = Importer.Import(S->getIdentLoc());
	LabelDecl *ToLabelDecl =
	cast_or_null<LabelDecl>(Importer.Import(S->getDecl()));
	if (!ToLabelDecl && S->getDecl())
	return nullptr;
	Stmt *ToSubStmt = Importer.Import(S->getSubStmt());
	if (!ToSubStmt && S->getSubStmt())
	return nullptr;
	return new (Importer.getToContext()) LabelStmt(ToIdentLoc, ToLabelDecl,
	ToSubStmt);
	}

	Stmt ASTNodeImporter::VisitAttributedStmt(AttributedStmt S) {
	SourceLocation ToAttrLoc = Importer.Import(S->getAttrLoc());
	ArrayRef<const Attr*> FromAttrs(S->getAttrs());
	SmallVector<const Attr *, 1> ToAttrs(FromAttrs.size());
	ASTContext &_ToContext = Importer.getToContext();
	std::transform(FromAttrs.begin(), FromAttrs.end(), ToAttrs.begin(),
	[&_ToContext](const Attr A) -> const Attr {
	return A->clone(_ToContext);
	});
	for (const Attr *ToA : ToAttrs) {
	if (!ToA)
	return nullptr;
	}
	Stmt *ToSubStmt = Importer.Import(S->getSubStmt());
	if (!ToSubStmt && S->getSubStmt())
	return nullptr;
	return AttributedStmt::Create(Importer.getToContext(), ToAttrLoc,
	ToAttrs, ToSubStmt);
	}

	Stmt ASTNodeImporter::VisitIfStmt(IfStmt S) {
	SourceLocation ToIfLoc = Importer.Import(S->getIfLoc());
	Stmt *ToInit = Importer.Import(S->getInit());
	if (!ToInit && S->getInit())
	return nullptr;
	VarDecl *ToConditionVariable = nullptr;
	if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
	ToConditionVariable =
	dyn_cast_or_null<VarDecl>(Importer.Import(FromConditionVariable));
	if (!ToConditionVariable)
	return nullptr;
	}
	Expr *ToCondition = Importer.Import(S->getCond());
	if (!ToCondition && S->getCond())
	return nullptr;
	Stmt *ToThenStmt = Importer.Import(S->getThen());
	if (!ToThenStmt && S->getThen())
	return nullptr;
	SourceLocation ToElseLoc = Importer.Import(S->getElseLoc());
	Stmt *ToElseStmt = Importer.Import(S->getElse());
	if (!ToElseStmt && S->getElse())
	return nullptr;
	return new (Importer.getToContext()) IfStmt(Importer.getToContext(),
	ToIfLoc, S->isConstexpr(),
	ToInit,
	ToConditionVariable,
	ToCondition, ToThenStmt,
	ToElseLoc, ToElseStmt);
	}

	Stmt ASTNodeImporter::VisitSwitchStmt(SwitchStmt S) {
	Stmt *ToInit = Importer.Import(S->getInit());
	if (!ToInit && S->getInit())
	return nullptr;
	VarDecl *ToConditionVariable = nullptr;
	if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
	ToConditionVariable =
	dyn_cast_or_null<VarDecl>(Importer.Import(FromConditionVariable));
	if (!ToConditionVariable)
	return nullptr;
	}
	Expr *ToCondition = Importer.Import(S->getCond());
	if (!ToCondition && S->getCond())
	return nullptr;
	SwitchStmt *ToStmt = new (Importer.getToContext()) SwitchStmt(
	Importer.getToContext(), ToInit,
	ToConditionVariable, ToCondition);
	Stmt *ToBody = Importer.Import(S->getBody());
	if (!ToBody && S->getBody())
	return nullptr;
	ToStmt->setBody(ToBody);
	ToStmt->setSwitchLoc(Importer.Import(S->getSwitchLoc()));
	// Now we have to re-chain the cases.
	SwitchCase *LastChainedSwitchCase = nullptr;
	for (SwitchCase *SC = S->getSwitchCaseList(); SC != nullptr;
	SC = SC->getNextSwitchCase()) {
	SwitchCase *ToSC = dyn_cast_or_null<SwitchCase>(Importer.Import(SC));
	if (!ToSC)
	return nullptr;
	if (LastChainedSwitchCase)
	LastChainedSwitchCase->setNextSwitchCase(ToSC);
	else
	ToStmt->setSwitchCaseList(ToSC);
	LastChainedSwitchCase = ToSC;
	}
	return ToStmt;
	}

	Stmt ASTNodeImporter::VisitWhileStmt(WhileStmt S) {
	VarDecl *ToConditionVariable = nullptr;
	if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
	ToConditionVariable =
	dyn_cast_or_null<VarDecl>(Importer.Import(FromConditionVariable));
	if (!ToConditionVariable)
	return nullptr;
	}
	Expr *ToCondition = Importer.Import(S->getCond());
	if (!ToCondition && S->getCond())
	return nullptr;
	Stmt *ToBody = Importer.Import(S->getBody());
	if (!ToBody && S->getBody())
	return nullptr;
	SourceLocation ToWhileLoc = Importer.Import(S->getWhileLoc());
	return new (Importer.getToContext()) WhileStmt(Importer.getToContext(),
	ToConditionVariable,
	ToCondition, ToBody,
	ToWhileLoc);
	}

	Stmt ASTNodeImporter::VisitDoStmt(DoStmt S) {
	Stmt *ToBody = Importer.Import(S->getBody());
	if (!ToBody && S->getBody())
	return nullptr;
	Expr *ToCondition = Importer.Import(S->getCond());
	if (!ToCondition && S->getCond())
	return nullptr;
	SourceLocation ToDoLoc = Importer.Import(S->getDoLoc());
	SourceLocation ToWhileLoc = Importer.Import(S->getWhileLoc());
	SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
	return new (Importer.getToContext()) DoStmt(ToBody, ToCondition,
	ToDoLoc, ToWhileLoc,
	ToRParenLoc);
	}

	Stmt ASTNodeImporter::VisitForStmt(ForStmt S) {
	Stmt *ToInit = Importer.Import(S->getInit());
	if (!ToInit && S->getInit())
	return nullptr;
	Expr *ToCondition = Importer.Import(S->getCond());
	if (!ToCondition && S->getCond())
	return nullptr;
	VarDecl *ToConditionVariable = nullptr;
	if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
	ToConditionVariable =
	dyn_cast_or_null<VarDecl>(Importer.Import(FromConditionVariable));
	if (!ToConditionVariable)
	return nullptr;
	}
	Expr *ToInc = Importer.Import(S->getInc());
	if (!ToInc && S->getInc())
	return nullptr;
	Stmt *ToBody = Importer.Import(S->getBody());
	if (!ToBody && S->getBody())
	return nullptr;
	SourceLocation ToForLoc = Importer.Import(S->getForLoc());
	SourceLocation ToLParenLoc = Importer.Import(S->getLParenLoc());
	SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
	return new (Importer.getToContext()) ForStmt(Importer.getToContext(),
	ToInit, ToCondition,
	ToConditionVariable,
	ToInc, ToBody,
	ToForLoc, ToLParenLoc,
	ToRParenLoc);
	}

	Stmt ASTNodeImporter::VisitGotoStmt(GotoStmt S) {
	LabelDecl *ToLabel = nullptr;
	if (LabelDecl *FromLabel = S->getLabel()) {
	ToLabel = dyn_cast_or_null<LabelDecl>(Importer.Import(FromLabel));
	if (!ToLabel)
	return nullptr;
	}
	SourceLocation ToGotoLoc = Importer.Import(S->getGotoLoc());
	SourceLocation ToLabelLoc = Importer.Import(S->getLabelLoc());
	return new (Importer.getToContext()) GotoStmt(ToLabel,
	ToGotoLoc, ToLabelLoc);
	}

	Stmt ASTNodeImporter::VisitIndirectGotoStmt(IndirectGotoStmt S) {
	SourceLocation ToGotoLoc = Importer.Import(S->getGotoLoc());
	SourceLocation ToStarLoc = Importer.Import(S->getStarLoc());
	Expr *ToTarget = Importer.Import(S->getTarget());
	if (!ToTarget && S->getTarget())
	return nullptr;
	return new (Importer.getToContext()) IndirectGotoStmt(ToGotoLoc, ToStarLoc,
	ToTarget);
	}

	Stmt ASTNodeImporter::VisitContinueStmt(ContinueStmt S) {
	SourceLocation ToContinueLoc = Importer.Import(S->getContinueLoc());
	return new (Importer.getToContext()) ContinueStmt(ToContinueLoc);
	}

	Stmt ASTNodeImporter::VisitBreakStmt(BreakStmt S) {
	SourceLocation ToBreakLoc = Importer.Import(S->getBreakLoc());
	return new (Importer.getToContext()) BreakStmt(ToBreakLoc);
	}

	Stmt ASTNodeImporter::VisitReturnStmt(ReturnStmt S) {
	SourceLocation ToRetLoc = Importer.Import(S->getReturnLoc());
	Expr *ToRetExpr = Importer.Import(S->getRetValue());
	if (!ToRetExpr && S->getRetValue())
	return nullptr;
	VarDecl NRVOCandidate = const_cast<VarDecl>(S->getNRVOCandidate());
	VarDecl *ToNRVOCandidate = cast_or_null<VarDecl>(Importer.Import(NRVOCandidate));
	if (!ToNRVOCandidate && NRVOCandidate)
	return nullptr;
	return new (Importer.getToContext()) ReturnStmt(ToRetLoc, ToRetExpr,
	ToNRVOCandidate);
	}

	Stmt ASTNodeImporter::VisitCXXCatchStmt(CXXCatchStmt S) {
	SourceLocation ToCatchLoc = Importer.Import(S->getCatchLoc());
	VarDecl *ToExceptionDecl = nullptr;
	if (VarDecl *FromExceptionDecl = S->getExceptionDecl()) {
	ToExceptionDecl =
	dyn_cast_or_null<VarDecl>(Importer.Import(FromExceptionDecl));
	if (!ToExceptionDecl)
	return nullptr;
	}
	Stmt *ToHandlerBlock = Importer.Import(S->getHandlerBlock());
	if (!ToHandlerBlock && S->getHandlerBlock())
	return nullptr;
	return new (Importer.getToContext()) CXXCatchStmt(ToCatchLoc,
	ToExceptionDecl,
	ToHandlerBlock);
	}

	Stmt ASTNodeImporter::VisitCXXTryStmt(CXXTryStmt S) {
	SourceLocation ToTryLoc = Importer.Import(S->getTryLoc());
	Stmt *ToTryBlock = Importer.Import(S->getTryBlock());
	if (!ToTryBlock && S->getTryBlock())
	return nullptr;
	SmallVector<Stmt *, 1> ToHandlers(S->getNumHandlers());
	for (unsigned HI = 0, HE = S->getNumHandlers(); HI != HE; ++HI) {
	CXXCatchStmt *FromHandler = S->getHandler(HI);
	if (Stmt *ToHandler = Importer.Import(FromHandler))
	ToHandlers[HI] = ToHandler;
	else
	return nullptr;
	}
	return CXXTryStmt::Create(Importer.getToContext(), ToTryLoc, ToTryBlock,
	ToHandlers);
	}

	Stmt ASTNodeImporter::VisitCXXForRangeStmt(CXXForRangeStmt S) {
	DeclStmt *ToRange =
	dyn_cast_or_null<DeclStmt>(Importer.Import(S->getRangeStmt()));
	if (!ToRange && S->getRangeStmt())
	return nullptr;
	DeclStmt *ToBegin =
	dyn_cast_or_null<DeclStmt>(Importer.Import(S->getBeginStmt()));
	if (!ToBegin && S->getBeginStmt())
	return nullptr;
	DeclStmt *ToEnd =
	dyn_cast_or_null<DeclStmt>(Importer.Import(S->getEndStmt()));
	if (!ToEnd && S->getEndStmt())
	return nullptr;
	Expr *ToCond = Importer.Import(S->getCond());
	if (!ToCond && S->getCond())
	return nullptr;
	Expr *ToInc = Importer.Import(S->getInc());
	if (!ToInc && S->getInc())
	return nullptr;
	DeclStmt *ToLoopVar =
	dyn_cast_or_null<DeclStmt>(Importer.Import(S->getLoopVarStmt()));
	if (!ToLoopVar && S->getLoopVarStmt())
	return nullptr;
	Stmt *ToBody = Importer.Import(S->getBody());
	if (!ToBody && S->getBody())
	return nullptr;
	SourceLocation ToForLoc = Importer.Import(S->getForLoc());
	SourceLocation ToCoawaitLoc = Importer.Import(S->getCoawaitLoc());
	SourceLocation ToColonLoc = Importer.Import(S->getColonLoc());
	SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
	return new (Importer.getToContext()) CXXForRangeStmt(ToRange, ToBegin, ToEnd,
	ToCond, ToInc,
	ToLoopVar, ToBody,
	ToForLoc, ToCoawaitLoc,
	ToColonLoc, ToRParenLoc);
	}

	Stmt ASTNodeImporter::VisitObjCForCollectionStmt(ObjCForCollectionStmt S) {
	Stmt *ToElem = Importer.Import(S->getElement());
	if (!ToElem && S->getElement())
	return nullptr;
	Expr *ToCollect = Importer.Import(S->getCollection());
	if (!ToCollect && S->getCollection())
	return nullptr;
	Stmt *ToBody = Importer.Import(S->getBody());
	if (!ToBody && S->getBody())
	return nullptr;
	SourceLocation ToForLoc = Importer.Import(S->getForLoc());
	SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
	return new (Importer.getToContext()) ObjCForCollectionStmt(ToElem,
	ToCollect,
	ToBody, ToForLoc,
	ToRParenLoc);
	}

	Stmt ASTNodeImporter::VisitObjCAtCatchStmt(ObjCAtCatchStmt S) {
	SourceLocation ToAtCatchLoc = Importer.Import(S->getAtCatchLoc());
	SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
	VarDecl *ToExceptionDecl = nullptr;
	if (VarDecl *FromExceptionDecl = S->getCatchParamDecl()) {
	ToExceptionDecl =
	dyn_cast_or_null<VarDecl>(Importer.Import(FromExceptionDecl));
	if (!ToExceptionDecl)
	return nullptr;
	}
	Stmt *ToBody = Importer.Import(S->getCatchBody());
	if (!ToBody && S->getCatchBody())
	return nullptr;
	return new (Importer.getToContext()) ObjCAtCatchStmt(ToAtCatchLoc,
	ToRParenLoc,
	ToExceptionDecl,
	ToBody);
	}

	Stmt ASTNodeImporter::VisitObjCAtFinallyStmt(ObjCAtFinallyStmt S) {
	SourceLocation ToAtFinallyLoc = Importer.Import(S->getAtFinallyLoc());
	Stmt *ToAtFinallyStmt = Importer.Import(S->getFinallyBody());
	if (!ToAtFinallyStmt && S->getFinallyBody())
	return nullptr;
	return new (Importer.getToContext()) ObjCAtFinallyStmt(ToAtFinallyLoc,
	ToAtFinallyStmt);
	}

	Stmt ASTNodeImporter::VisitObjCAtTryStmt(ObjCAtTryStmt S) {
	SourceLocation ToAtTryLoc = Importer.Import(S->getAtTryLoc());
	Stmt *ToAtTryStmt = Importer.Import(S->getTryBody());
	if (!ToAtTryStmt && S->getTryBody())
	return nullptr;
	SmallVector<Stmt *, 1> ToCatchStmts(S->getNumCatchStmts());
	for (unsigned CI = 0, CE = S->getNumCatchStmts(); CI != CE; ++CI) {
	ObjCAtCatchStmt *FromCatchStmt = S->getCatchStmt(CI);
	if (Stmt *ToCatchStmt = Importer.Import(FromCatchStmt))
	ToCatchStmts[CI] = ToCatchStmt;
	else
	return nullptr;
	}
	Stmt *ToAtFinallyStmt = Importer.Import(S->getFinallyStmt());
	if (!ToAtFinallyStmt && S->getFinallyStmt())
	return nullptr;
	return ObjCAtTryStmt::Create(Importer.getToContext(),
	ToAtTryLoc, ToAtTryStmt,
	ToCatchStmts.begin(), ToCatchStmts.size(),
	ToAtFinallyStmt);
	}

	Stmt *ASTNodeImporter::VisitObjCAtSynchronizedStmt
	(ObjCAtSynchronizedStmt *S) {
	SourceLocation ToAtSynchronizedLoc =
	Importer.Import(S->getAtSynchronizedLoc());
	Expr *ToSynchExpr = Importer.Import(S->getSynchExpr());
	if (!ToSynchExpr && S->getSynchExpr())
	return nullptr;
	Stmt *ToSynchBody = Importer.Import(S->getSynchBody());
	if (!ToSynchBody && S->getSynchBody())
	return nullptr;
	return new (Importer.getToContext()) ObjCAtSynchronizedStmt(
	ToAtSynchronizedLoc, ToSynchExpr, ToSynchBody);
	}

	Stmt ASTNodeImporter::VisitObjCAtThrowStmt(ObjCAtThrowStmt S) {
	SourceLocation ToAtThrowLoc = Importer.Import(S->getThrowLoc());
	Expr *ToThrow = Importer.Import(S->getThrowExpr());
	if (!ToThrow && S->getThrowExpr())
	return nullptr;
	return new (Importer.getToContext()) ObjCAtThrowStmt(ToAtThrowLoc, ToThrow);
	}

	Stmt *ASTNodeImporter::VisitObjCAutoreleasePoolStmt
	(ObjCAutoreleasePoolStmt *S) {
	SourceLocation ToAtLoc = Importer.Import(S->getAtLoc());
	Stmt *ToSubStmt = Importer.Import(S->getSubStmt());
	if (!ToSubStmt && S->getSubStmt())
	return nullptr;
	return new (Importer.getToContext()) ObjCAutoreleasePoolStmt(ToAtLoc,
	ToSubStmt);
	}

	//----------------------------------------------------------------------------
	// Import Expressions
	//----------------------------------------------------------------------------
	Expr ASTNodeImporter::VisitExpr(Expr E) {
	Importer.FromDiag(E->getLocStart(), diag::err_unsupported_ast_node)
	<< E->getStmtClassName();
	return nullptr;
	}

	Expr ASTNodeImporter::VisitVAArgExpr(VAArgExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *SubExpr = Importer.Import(E->getSubExpr());
	if (!SubExpr && E->getSubExpr())
	return nullptr;

	TypeSourceInfo *TInfo = Importer.Import(E->getWrittenTypeInfo());
	if (!TInfo)
	return nullptr;

	return new (Importer.getToContext()) VAArgExpr(
	Importer.Import(E->getBuiltinLoc()), SubExpr, TInfo,
	Importer.Import(E->getRParenLoc()), T, E->isMicrosoftABI());
	}


	Expr ASTNodeImporter::VisitGNUNullExpr(GNUNullExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	return new (Importer.getToContext()) GNUNullExpr(
	T, Importer.Import(E->getLocStart()));
	}

	Expr ASTNodeImporter::VisitPredefinedExpr(PredefinedExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	StringLiteral *SL = cast_or_null<StringLiteral>(
	Importer.Import(E->getFunctionName()));
	if (!SL && E->getFunctionName())
	return nullptr;

	return new (Importer.getToContext()) PredefinedExpr(
	Importer.Import(E->getLocStart()), T, E->getIdentType(), SL);
	}

	Expr ASTNodeImporter::VisitDeclRefExpr(DeclRefExpr E) {
	ValueDecl *ToD = cast_or_null<ValueDecl>(Importer.Import(E->getDecl()));
	if (!ToD)
	return nullptr;

	NamedDecl *FoundD = nullptr;
	if (E->getDecl() != E->getFoundDecl()) {
	FoundD = cast_or_null<NamedDecl>(Importer.Import(E->getFoundDecl()));
	if (!FoundD)
	return nullptr;
	}

	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;


	TemplateArgumentListInfo ToTAInfo;
	TemplateArgumentListInfo *ResInfo = nullptr;
	if (E->hasExplicitTemplateArgs()) {
	for (const auto &FromLoc : E->template_arguments()) {
	bool Error = false;
	TemplateArgumentLoc ToTALoc = ImportTemplateArgumentLoc(FromLoc, Error);
	if (Error)
	return nullptr;
	ToTAInfo.addArgument(ToTALoc);
	}
	ResInfo = &ToTAInfo;
	}

	DeclRefExpr *DRE = DeclRefExpr::Create(Importer.getToContext(),
	Importer.Import(E->getQualifierLoc()),
	Importer.Import(E->getTemplateKeywordLoc()),
	ToD,
	E->refersToEnclosingVariableOrCapture(),
	Importer.Import(E->getLocation()),
	T, E->getValueKind(),
	FoundD, ResInfo);
	if (E->hadMultipleCandidates())
	DRE->setHadMultipleCandidates(true);
	return DRE;
	}

	Expr ASTNodeImporter::VisitImplicitValueInitExpr(ImplicitValueInitExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	return new (Importer.getToContext()) ImplicitValueInitExpr(T);
	}

	ASTNodeImporter::Designator
	ASTNodeImporter::ImportDesignator(const Designator &D) {
	if (D.isFieldDesignator()) {
	IdentifierInfo *ToFieldName = Importer.Import(D.getFieldName());
	// Caller checks for import error
	return Designator(ToFieldName, Importer.Import(D.getDotLoc()),
	Importer.Import(D.getFieldLoc()));
	}
	if (D.isArrayDesignator())
	return Designator(D.getFirstExprIndex(),
	Importer.Import(D.getLBracketLoc()),
	Importer.Import(D.getRBracketLoc()));

	assert(D.isArrayRangeDesignator());
	return Designator(D.getFirstExprIndex(),
	Importer.Import(D.getLBracketLoc()),
	Importer.Import(D.getEllipsisLoc()),
	Importer.Import(D.getRBracketLoc()));
	}


	Expr ASTNodeImporter::VisitDesignatedInitExpr(DesignatedInitExpr DIE) {
	Expr *Init = cast_or_null<Expr>(Importer.Import(DIE->getInit()));
	if (!Init)
	return nullptr;

	SmallVector<Expr *, 4> IndexExprs(DIE->getNumSubExprs() - 1);
	// List elements from the second, the first is Init itself
	for (unsigned I = 1, E = DIE->getNumSubExprs(); I < E; I++) {
	if (Expr *Arg = cast_or_null<Expr>(Importer.Import(DIE->getSubExpr(I))))
	IndexExprs[I - 1] = Arg;
	else
	return nullptr;
	}

	SmallVector<Designator, 4> Designators(DIE->size());
	llvm::transform(DIE->designators(), Designators.begin(),
	[this](const Designator &D) -> Designator {
	return ImportDesignator(D);
	});

	for (const Designator &D : DIE->designators())
	if (D.isFieldDesignator() && !D.getFieldName())
	return nullptr;

	return DesignatedInitExpr::Create(
	Importer.getToContext(), Designators,
	IndexExprs, Importer.Import(DIE->getEqualOrColonLoc()),
	DIE->usesGNUSyntax(), Init);
	}

	Expr ASTNodeImporter::VisitCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	return new (Importer.getToContext())
	CXXNullPtrLiteralExpr(T, Importer.Import(E->getLocation()));
	}

	Expr ASTNodeImporter::VisitIntegerLiteral(IntegerLiteral E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	return IntegerLiteral::Create(Importer.getToContext(),
	E->getValue(), T,
	Importer.Import(E->getLocation()));
	}

	Expr ASTNodeImporter::VisitFloatingLiteral(FloatingLiteral E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	return FloatingLiteral::Create(Importer.getToContext(),
	E->getValue(), E->isExact(), T,
	Importer.Import(E->getLocation()));
	}

	Expr ASTNodeImporter::VisitCharacterLiteral(CharacterLiteral E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	return new (Importer.getToContext()) CharacterLiteral(E->getValue(),
	E->getKind(), T,
	Importer.Import(E->getLocation()));
	}

	Expr ASTNodeImporter::VisitStringLiteral(StringLiteral E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	SmallVector<SourceLocation, 4> Locations(E->getNumConcatenated());
	ImportArray(E->tokloc_begin(), E->tokloc_end(), Locations.begin());

	return StringLiteral::Create(Importer.getToContext(), E->getBytes(),
	E->getKind(), E->isPascal(), T,
	Locations.data(), Locations.size());
	}

	Expr ASTNodeImporter::VisitCompoundLiteralExpr(CompoundLiteralExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	TypeSourceInfo *TInfo = Importer.Import(E->getTypeSourceInfo());
	if (!TInfo)
	return nullptr;

	Expr *Init = Importer.Import(E->getInitializer());
	if (!Init)
	return nullptr;

	return new (Importer.getToContext()) CompoundLiteralExpr(
	Importer.Import(E->getLParenLoc()), TInfo, T, E->getValueKind(),
	Init, E->isFileScope());
	}

	Expr ASTNodeImporter::VisitAtomicExpr(AtomicExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	SmallVector<Expr *, 6> Exprs(E->getNumSubExprs());
	if (ImportArrayChecked(
	E->getSubExprs(), E->getSubExprs() + E->getNumSubExprs(),
	Exprs.begin()))
	return nullptr;

	return new (Importer.getToContext()) AtomicExpr(
	Importer.Import(E->getBuiltinLoc()), Exprs, T, E->getOp(),
	Importer.Import(E->getRParenLoc()));
	}

	Expr ASTNodeImporter::VisitAddrLabelExpr(AddrLabelExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	LabelDecl *ToLabel = cast_or_null<LabelDecl>(Importer.Import(E->getLabel()));
	if (!ToLabel)
	return nullptr;

	return new (Importer.getToContext()) AddrLabelExpr(
	Importer.Import(E->getAmpAmpLoc()), Importer.Import(E->getLabelLoc()),
	ToLabel, T);
	}

	Expr ASTNodeImporter::VisitParenExpr(ParenExpr E) {
	Expr *SubExpr = Importer.Import(E->getSubExpr());
	if (!SubExpr)
	return nullptr;

	return new (Importer.getToContext())
	ParenExpr(Importer.Import(E->getLParen()),
	Importer.Import(E->getRParen()),
	SubExpr);
	}

	Expr ASTNodeImporter::VisitParenListExpr(ParenListExpr E) {
	SmallVector<Expr *, 4> Exprs(E->getNumExprs());
	if (ImportContainerChecked(E->exprs(), Exprs))
	return nullptr;

	return new (Importer.getToContext()) ParenListExpr(
	Importer.getToContext(), Importer.Import(E->getLParenLoc()),
	Exprs, Importer.Import(E->getLParenLoc()));
	}

	Expr ASTNodeImporter::VisitStmtExpr(StmtExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	CompoundStmt *ToSubStmt = cast_or_null<CompoundStmt>(
	Importer.Import(E->getSubStmt()));
	if (!ToSubStmt && E->getSubStmt())
	return nullptr;

	return new (Importer.getToContext()) StmtExpr(ToSubStmt, T,
	Importer.Import(E->getLParenLoc()), Importer.Import(E->getRParenLoc()));
	}

	Expr ASTNodeImporter::VisitUnaryOperator(UnaryOperator E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *SubExpr = Importer.Import(E->getSubExpr());
	if (!SubExpr)
	return nullptr;

	return new (Importer.getToContext()) UnaryOperator(SubExpr, E->getOpcode(),
	T, E->getValueKind(),
	E->getObjectKind(),
	Importer.Import(E->getOperatorLoc()));
	}

	Expr *ASTNodeImporter::VisitUnaryExprOrTypeTraitExpr(
	UnaryExprOrTypeTraitExpr *E) {
	QualType ResultType = Importer.Import(E->getType());

	if (E->isArgumentType()) {
	TypeSourceInfo *TInfo = Importer.Import(E->getArgumentTypeInfo());
	if (!TInfo)
	return nullptr;

	return new (Importer.getToContext()) UnaryExprOrTypeTraitExpr(E->getKind(),
	TInfo, ResultType,
	Importer.Import(E->getOperatorLoc()),
	Importer.Import(E->getRParenLoc()));
	}

	Expr *SubExpr = Importer.Import(E->getArgumentExpr());
	if (!SubExpr)
	return nullptr;

	return new (Importer.getToContext()) UnaryExprOrTypeTraitExpr(E->getKind(),
	SubExpr, ResultType,
	Importer.Import(E->getOperatorLoc()),
	Importer.Import(E->getRParenLoc()));
	}

	Expr ASTNodeImporter::VisitBinaryOperator(BinaryOperator E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *LHS = Importer.Import(E->getLHS());
	if (!LHS)
	return nullptr;

	Expr *RHS = Importer.Import(E->getRHS());
	if (!RHS)
	return nullptr;

	return new (Importer.getToContext()) BinaryOperator(LHS, RHS, E->getOpcode(),
	T, E->getValueKind(),
	E->getObjectKind(),
	Importer.Import(E->getOperatorLoc()),
	E->getFPFeatures());
	}

	Expr ASTNodeImporter::VisitConditionalOperator(ConditionalOperator E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *ToLHS = Importer.Import(E->getLHS());
	if (!ToLHS)
	return nullptr;

	Expr *ToRHS = Importer.Import(E->getRHS());
	if (!ToRHS)
	return nullptr;

	Expr *ToCond = Importer.Import(E->getCond());
	if (!ToCond)
	return nullptr;

	return new (Importer.getToContext()) ConditionalOperator(
	ToCond, Importer.Import(E->getQuestionLoc()),
	ToLHS, Importer.Import(E->getColonLoc()),
	ToRHS, T, E->getValueKind(), E->getObjectKind());
	}

	Expr *ASTNodeImporter::VisitBinaryConditionalOperator(
	BinaryConditionalOperator *E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *Common = Importer.Import(E->getCommon());
	if (!Common)
	return nullptr;

	Expr *Cond = Importer.Import(E->getCond());
	if (!Cond)
	return nullptr;

	OpaqueValueExpr *OpaqueValue = cast_or_null<OpaqueValueExpr>(
	Importer.Import(E->getOpaqueValue()));
	if (!OpaqueValue)
	return nullptr;

	Expr *TrueExpr = Importer.Import(E->getTrueExpr());
	if (!TrueExpr)
	return nullptr;

	Expr *FalseExpr = Importer.Import(E->getFalseExpr());
	if (!FalseExpr)
	return nullptr;

	return new (Importer.getToContext()) BinaryConditionalOperator(
	Common, OpaqueValue, Cond, TrueExpr, FalseExpr,
	Importer.Import(E->getQuestionLoc()), Importer.Import(E->getColonLoc()),
	T, E->getValueKind(), E->getObjectKind());
	}

	Expr ASTNodeImporter::VisitArrayTypeTraitExpr(ArrayTypeTraitExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	TypeSourceInfo *ToQueried = Importer.Import(E->getQueriedTypeSourceInfo());
	if (!ToQueried)
	return nullptr;

	Expr *Dim = Importer.Import(E->getDimensionExpression());
	if (!Dim && E->getDimensionExpression())
	return nullptr;

	return new (Importer.getToContext()) ArrayTypeTraitExpr(
	Importer.Import(E->getLocStart()), E->getTrait(), ToQueried,
	E->getValue(), Dim, Importer.Import(E->getLocEnd()), T);
	}

	Expr ASTNodeImporter::VisitExpressionTraitExpr(ExpressionTraitExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *ToQueried = Importer.Import(E->getQueriedExpression());
	if (!ToQueried)
	return nullptr;

	return new (Importer.getToContext()) ExpressionTraitExpr(
	Importer.Import(E->getLocStart()), E->getTrait(), ToQueried,
	E->getValue(), Importer.Import(E->getLocEnd()), T);
	}

	Expr ASTNodeImporter::VisitOpaqueValueExpr(OpaqueValueExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *SourceExpr = Importer.Import(E->getSourceExpr());
	if (!SourceExpr && E->getSourceExpr())
	return nullptr;

	return new (Importer.getToContext()) OpaqueValueExpr(
	Importer.Import(E->getLocation()), T, E->getValueKind(),
	E->getObjectKind(), SourceExpr);
	}

	Expr ASTNodeImporter::VisitArraySubscriptExpr(ArraySubscriptExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *ToLHS = Importer.Import(E->getLHS());
	if (!ToLHS)
	return nullptr;

	Expr *ToRHS = Importer.Import(E->getRHS());
	if (!ToRHS)
	return nullptr;

	return new (Importer.getToContext()) ArraySubscriptExpr(
	ToLHS, ToRHS, T, E->getValueKind(), E->getObjectKind(),
	Importer.Import(E->getRBracketLoc()));
	}

	Expr ASTNodeImporter::VisitCompoundAssignOperator(CompoundAssignOperator E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	QualType CompLHSType = Importer.Import(E->getComputationLHSType());
	if (CompLHSType.isNull())
	return nullptr;

	QualType CompResultType = Importer.Import(E->getComputationResultType());
	if (CompResultType.isNull())
	return nullptr;

	Expr *LHS = Importer.Import(E->getLHS());
	if (!LHS)
	return nullptr;

	Expr *RHS = Importer.Import(E->getRHS());
	if (!RHS)
	return nullptr;

	return new (Importer.getToContext())
	CompoundAssignOperator(LHS, RHS, E->getOpcode(),
	T, E->getValueKind(),
	E->getObjectKind(),
	CompLHSType, CompResultType,
	Importer.Import(E->getOperatorLoc()),
	E->getFPFeatures());
	}

	bool ASTNodeImporter::ImportCastPath(CastExpr *CE, CXXCastPath &Path) {
	for (auto I = CE->path_begin(), E = CE->path_end(); I != E; ++I) {
	if (CXXBaseSpecifier Spec = Importer.Import(I))
	Path.push_back(Spec);
	else
	return true;
	}
	return false;
	}

	Expr ASTNodeImporter::VisitImplicitCastExpr(ImplicitCastExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *SubExpr = Importer.Import(E->getSubExpr());
	if (!SubExpr)
	return nullptr;

	CXXCastPath BasePath;
	if (ImportCastPath(E, BasePath))
	return nullptr;

	return ImplicitCastExpr::Create(Importer.getToContext(), T, E->getCastKind(),
	SubExpr, &BasePath, E->getValueKind());
	}

	Expr ASTNodeImporter::VisitExplicitCastExpr(ExplicitCastExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *SubExpr = Importer.Import(E->getSubExpr());
	if (!SubExpr)
	return nullptr;

	TypeSourceInfo *TInfo = Importer.Import(E->getTypeInfoAsWritten());
	if (!TInfo && E->getTypeInfoAsWritten())
	return nullptr;

	CXXCastPath BasePath;
	if (ImportCastPath(E, BasePath))
	return nullptr;

	switch (E->getStmtClass()) {
	case Stmt::CStyleCastExprClass: {
	CStyleCastExpr *CCE = cast<CStyleCastExpr>(E);
	return CStyleCastExpr::Create(Importer.getToContext(), T,
	E->getValueKind(), E->getCastKind(),
	SubExpr, &BasePath, TInfo,
	Importer.Import(CCE->getLParenLoc()),
	Importer.Import(CCE->getRParenLoc()));
	}

	case Stmt::CXXFunctionalCastExprClass: {
	CXXFunctionalCastExpr *FCE = cast<CXXFunctionalCastExpr>(E);
	return CXXFunctionalCastExpr::Create(Importer.getToContext(), T,
	E->getValueKind(), TInfo,
	E->getCastKind(), SubExpr, &BasePath,
	Importer.Import(FCE->getLParenLoc()),
	Importer.Import(FCE->getRParenLoc()));
	}

	case Stmt::ObjCBridgedCastExprClass: {
	ObjCBridgedCastExpr *OCE = cast<ObjCBridgedCastExpr>(E);
	return new (Importer.getToContext()) ObjCBridgedCastExpr(
	Importer.Import(OCE->getLParenLoc()), OCE->getBridgeKind(),
	E->getCastKind(), Importer.Import(OCE->getBridgeKeywordLoc()),
	TInfo, SubExpr);
	}
	default:
	break; // just fall through
	}

	CXXNamedCastExpr *Named = cast<CXXNamedCastExpr>(E);
	SourceLocation ExprLoc = Importer.Import(Named->getOperatorLoc()),
	RParenLoc = Importer.Import(Named->getRParenLoc());
	SourceRange Brackets = Importer.Import(Named->getAngleBrackets());

	switch (E->getStmtClass()) {
	case Stmt::CXXStaticCastExprClass:
	return CXXStaticCastExpr::Create(Importer.getToContext(), T,
	E->getValueKind(), E->getCastKind(),
	SubExpr, &BasePath, TInfo,
	ExprLoc, RParenLoc, Brackets);

	case Stmt::CXXDynamicCastExprClass:
	return CXXDynamicCastExpr::Create(Importer.getToContext(), T,
	E->getValueKind(), E->getCastKind(),
	SubExpr, &BasePath, TInfo,
	ExprLoc, RParenLoc, Brackets);

	case Stmt::CXXReinterpretCastExprClass:
	return CXXReinterpretCastExpr::Create(Importer.getToContext(), T,
	E->getValueKind(), E->getCastKind(),
	SubExpr, &BasePath, TInfo,
	ExprLoc, RParenLoc, Brackets);

	case Stmt::CXXConstCastExprClass:
	return CXXConstCastExpr::Create(Importer.getToContext(), T,
	E->getValueKind(), SubExpr, TInfo, ExprLoc,
	RParenLoc, Brackets);
	default:
	llvm_unreachable("Cast expression of unsupported type!");
	return nullptr;
	}
	}

	Expr ASTNodeImporter::VisitOffsetOfExpr(OffsetOfExpr OE) {
	QualType T = Importer.Import(OE->getType());
	if (T.isNull())
	return nullptr;

	SmallVector<OffsetOfNode, 4> Nodes;
	for (int I = 0, E = OE->getNumComponents(); I < E; ++I) {
	const OffsetOfNode &Node = OE->getComponent(I);

	switch (Node.getKind()) {
	case OffsetOfNode::Array:
	Nodes.push_back(OffsetOfNode(Importer.Import(Node.getLocStart()),
	Node.getArrayExprIndex(),
	Importer.Import(Node.getLocEnd())));
	break;

	case OffsetOfNode::Base: {
	CXXBaseSpecifier *BS = Importer.Import(Node.getBase());
	if (!BS && Node.getBase())
	return nullptr;
	Nodes.push_back(OffsetOfNode(BS));
	break;
	}
	case OffsetOfNode::Field: {
	FieldDecl *FD = cast_or_null<FieldDecl>(Importer.Import(Node.getField()));
	if (!FD)
	return nullptr;
	Nodes.push_back(OffsetOfNode(Importer.Import(Node.getLocStart()), FD,
	Importer.Import(Node.getLocEnd())));
	break;
	}
	case OffsetOfNode::Identifier: {
	IdentifierInfo *ToII = Importer.Import(Node.getFieldName());
	if (!ToII)
	return nullptr;
	Nodes.push_back(OffsetOfNode(Importer.Import(Node.getLocStart()), ToII,
	Importer.Import(Node.getLocEnd())));
	break;
	}
	}
	}

	SmallVector<Expr *, 4> Exprs(OE->getNumExpressions());
	for (int I = 0, E = OE->getNumExpressions(); I < E; ++I) {
	Expr *ToIndexExpr = Importer.Import(OE->getIndexExpr(I));
	if (!ToIndexExpr)
	return nullptr;
	Exprs[I] = ToIndexExpr;
	}

	TypeSourceInfo *TInfo = Importer.Import(OE->getTypeSourceInfo());
	if (!TInfo && OE->getTypeSourceInfo())
	return nullptr;

	return OffsetOfExpr::Create(Importer.getToContext(), T,
	Importer.Import(OE->getOperatorLoc()),
	TInfo, Nodes, Exprs,
	Importer.Import(OE->getRParenLoc()));
	}

	Expr ASTNodeImporter::VisitCXXNoexceptExpr(CXXNoexceptExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *Operand = Importer.Import(E->getOperand());
	if (!Operand)
	return nullptr;

	CanThrowResult CanThrow;
	if (E->isValueDependent())
	CanThrow = CT_Dependent;
	else
	CanThrow = E->getValue() ? CT_Can : CT_Cannot;

	return new (Importer.getToContext()) CXXNoexceptExpr(
	T, Operand, CanThrow,
	Importer.Import(E->getLocStart()), Importer.Import(E->getLocEnd()));
	}

	Expr ASTNodeImporter::VisitCXXThrowExpr(CXXThrowExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *SubExpr = Importer.Import(E->getSubExpr());
	if (!SubExpr && E->getSubExpr())
	return nullptr;

	return new (Importer.getToContext()) CXXThrowExpr(
	SubExpr, T, Importer.Import(E->getThrowLoc()),
	E->isThrownVariableInScope());
	}

	Expr ASTNodeImporter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr E) {
	ParmVarDecl *Param = cast_or_null<ParmVarDecl>(
	Importer.Import(E->getParam()));
	if (!Param)
	return nullptr;

	return CXXDefaultArgExpr::Create(
	Importer.getToContext(), Importer.Import(E->getUsedLocation()), Param);
	}

	Expr ASTNodeImporter::VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	TypeSourceInfo *TypeInfo = Importer.Import(E->getTypeSourceInfo());
	if (!TypeInfo)
	return nullptr;

	return new (Importer.getToContext()) CXXScalarValueInitExpr(
	T, TypeInfo, Importer.Import(E->getRParenLoc()));
	}

	Expr ASTNodeImporter::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr E) {
	Expr *SubExpr = Importer.Import(E->getSubExpr());
	if (!SubExpr)
	return nullptr;

	auto *Dtor = cast_or_null<CXXDestructorDecl>(
	Importer.Import(const_cast<CXXDestructorDecl *>(
	E->getTemporary()->getDestructor())));
	if (!Dtor)
	return nullptr;

	ASTContext &ToCtx = Importer.getToContext();
	CXXTemporary *Temp = CXXTemporary::Create(ToCtx, Dtor);
	return CXXBindTemporaryExpr::Create(ToCtx, Temp, SubExpr);
	}

	Expr ASTNodeImporter::VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr CE) {
	QualType T = Importer.Import(CE->getType());
	if (T.isNull())
	return nullptr;

	SmallVector<Expr *, 8> Args(CE->getNumArgs());
	if (ImportContainerChecked(CE->arguments(), Args))
	return nullptr;

	auto *Ctor = cast_or_null<CXXConstructorDecl>(
	Importer.Import(CE->getConstructor()));
	if (!Ctor)
	return nullptr;

	return CXXTemporaryObjectExpr::Create(
	Importer.getToContext(), T,
	Importer.Import(CE->getLocStart()),
	Ctor,
	CE->isElidable(),
	Args,
	CE->hadMultipleCandidates(),
	CE->isListInitialization(),
	CE->isStdInitListInitialization(),
	CE->requiresZeroInitialization(),
	CE->getConstructionKind(),
	Importer.Import(CE->getParenOrBraceRange()));
	}

	Expr *
	ASTNodeImporter::VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *TempE = Importer.Import(E->GetTemporaryExpr());
	if (!TempE)
	return nullptr;

	ValueDecl *ExtendedBy = cast_or_null<ValueDecl>(
	Importer.Import(const_cast<ValueDecl *>(E->getExtendingDecl())));
	if (!ExtendedBy && E->getExtendingDecl())
	return nullptr;

	auto *ToMTE = new (Importer.getToContext()) MaterializeTemporaryExpr(
	T, TempE, E->isBoundToLvalueReference());

	// FIXME: Should ManglingNumber get numbers associated with 'to' context?
	ToMTE->setExtendingDecl(ExtendedBy, E->getManglingNumber());
	return ToMTE;
	}

	Expr ASTNodeImporter::VisitCXXNewExpr(CXXNewExpr CE) {
	QualType T = Importer.Import(CE->getType());
	if (T.isNull())
	return nullptr;

	SmallVector<Expr *, 4> PlacementArgs(CE->getNumPlacementArgs());
	if (ImportContainerChecked(CE->placement_arguments(), PlacementArgs))
	return nullptr;

	FunctionDecl *OperatorNewDecl = cast_or_null<FunctionDecl>(
	Importer.Import(CE->getOperatorNew()));
	if (!OperatorNewDecl && CE->getOperatorNew())
	return nullptr;

	FunctionDecl *OperatorDeleteDecl = cast_or_null<FunctionDecl>(
	Importer.Import(CE->getOperatorDelete()));
	if (!OperatorDeleteDecl && CE->getOperatorDelete())
	return nullptr;

	Expr *ToInit = Importer.Import(CE->getInitializer());
	if (!ToInit && CE->getInitializer())
	return nullptr;

	TypeSourceInfo *TInfo = Importer.Import(CE->getAllocatedTypeSourceInfo());
	if (!TInfo)
	return nullptr;

	Expr *ToArrSize = Importer.Import(CE->getArraySize());
	if (!ToArrSize && CE->getArraySize())
	return nullptr;

	return new (Importer.getToContext()) CXXNewExpr(
	Importer.getToContext(),
	CE->isGlobalNew(),
	OperatorNewDecl, OperatorDeleteDecl,
	CE->passAlignment(),
	CE->doesUsualArrayDeleteWantSize(),
	PlacementArgs,
	Importer.Import(CE->getTypeIdParens()),
	ToArrSize, CE->getInitializationStyle(), ToInit, T, TInfo,
	Importer.Import(CE->getSourceRange()),
	Importer.Import(CE->getDirectInitRange()));
	}

	Expr ASTNodeImporter::VisitCXXDeleteExpr(CXXDeleteExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	FunctionDecl *OperatorDeleteDecl = cast_or_null<FunctionDecl>(
	Importer.Import(E->getOperatorDelete()));
	if (!OperatorDeleteDecl && E->getOperatorDelete())
	return nullptr;

	Expr *ToArg = Importer.Import(E->getArgument());
	if (!ToArg && E->getArgument())
	return nullptr;

	return new (Importer.getToContext()) CXXDeleteExpr(
	T, E->isGlobalDelete(),
	E->isArrayForm(),
	E->isArrayFormAsWritten(),
	E->doesUsualArrayDeleteWantSize(),
	OperatorDeleteDecl,
	ToArg,
	Importer.Import(E->getLocStart()));
	}

	Expr ASTNodeImporter::VisitCXXConstructExpr(CXXConstructExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	CXXConstructorDecl *ToCCD =
	dyn_cast_or_null<CXXConstructorDecl>(Importer.Import(E->getConstructor()));
	if (!ToCCD)
	return nullptr;

	SmallVector<Expr *, 6> ToArgs(E->getNumArgs());
	if (ImportContainerChecked(E->arguments(), ToArgs))
	return nullptr;

	return CXXConstructExpr::Create(Importer.getToContext(), T,
	Importer.Import(E->getLocation()),
	ToCCD, E->isElidable(),
	ToArgs, E->hadMultipleCandidates(),
	E->isListInitialization(),
	E->isStdInitListInitialization(),
	E->requiresZeroInitialization(),
	E->getConstructionKind(),
	Importer.Import(E->getParenOrBraceRange()));
	}

	Expr ASTNodeImporter::VisitExprWithCleanups(ExprWithCleanups EWC) {
	Expr *SubExpr = Importer.Import(EWC->getSubExpr());
	if (!SubExpr && EWC->getSubExpr())
	return nullptr;

	SmallVector<ExprWithCleanups::CleanupObject, 8> Objs(EWC->getNumObjects());
	for (unsigned I = 0, E = EWC->getNumObjects(); I < E; I++)
	if (ExprWithCleanups::CleanupObject Obj =
	cast_or_null<BlockDecl>(Importer.Import(EWC->getObject(I))))
	Objs[I] = Obj;
	else
	return nullptr;

	return ExprWithCleanups::Create(Importer.getToContext(),
	SubExpr, EWC->cleanupsHaveSideEffects(),
	Objs);
	}

	Expr ASTNodeImporter::VisitCXXMemberCallExpr(CXXMemberCallExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *ToFn = Importer.Import(E->getCallee());
	if (!ToFn)
	return nullptr;

	SmallVector<Expr *, 4> ToArgs(E->getNumArgs());
	if (ImportContainerChecked(E->arguments(), ToArgs))
	return nullptr;

	return new (Importer.getToContext()) CXXMemberCallExpr(
	Importer.getToContext(), ToFn, ToArgs, T, E->getValueKind(),
	Importer.Import(E->getRParenLoc()));
	}

	Expr ASTNodeImporter::VisitCXXThisExpr(CXXThisExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	return new (Importer.getToContext())
	CXXThisExpr(Importer.Import(E->getLocation()), T, E->isImplicit());
	}

	Expr ASTNodeImporter::VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	return new (Importer.getToContext())
	CXXBoolLiteralExpr(E->getValue(), T, Importer.Import(E->getLocation()));
	}


	Expr ASTNodeImporter::VisitMemberExpr(MemberExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *ToBase = Importer.Import(E->getBase());
	if (!ToBase && E->getBase())
	return nullptr;

	ValueDecl *ToMember = dyn_cast<ValueDecl>(Importer.Import(E->getMemberDecl()));
	if (!ToMember && E->getMemberDecl())
	return nullptr;

	DeclAccessPair ToFoundDecl = DeclAccessPair::make(
	dyn_cast<NamedDecl>(Importer.Import(E->getFoundDecl().getDecl())),
	E->getFoundDecl().getAccess());

	DeclarationNameInfo ToMemberNameInfo(
	Importer.Import(E->getMemberNameInfo().getName()),
	Importer.Import(E->getMemberNameInfo().getLoc()));

	if (E->hasExplicitTemplateArgs()) {
	return nullptr; // FIXME: handle template arguments
	}

	return MemberExpr::Create(Importer.getToContext(), ToBase,
	E->isArrow(),
	Importer.Import(E->getOperatorLoc()),
	Importer.Import(E->getQualifierLoc()),
	Importer.Import(E->getTemplateKeywordLoc()),
	ToMember, ToFoundDecl, ToMemberNameInfo,
	nullptr, T, E->getValueKind(),
	E->getObjectKind());
	}

	Expr ASTNodeImporter::VisitCallExpr(CallExpr E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	Expr *ToCallee = Importer.Import(E->getCallee());
	if (!ToCallee && E->getCallee())
	return nullptr;

	unsigned NumArgs = E->getNumArgs();

	llvm::SmallVector<Expr *, 2> ToArgs(NumArgs);

	for (unsigned ai = 0, ae = NumArgs; ai != ae; ++ai) {
	Expr *FromArg = E->getArg(ai);
	Expr *ToArg = Importer.Import(FromArg);
	if (!ToArg)
	return nullptr;
	ToArgs[ai] = ToArg;
	}

	Expr **ToArgs_Copied = new (Importer.getToContext())
	Expr*[NumArgs];

	for (unsigned ai = 0, ae = NumArgs; ai != ae; ++ai)
	ToArgs_Copied[ai] = ToArgs[ai];

	return new (Importer.getToContext())
	CallExpr(Importer.getToContext(), ToCallee,
	llvm::makeArrayRef(ToArgs_Copied, NumArgs), T, E->getValueKind(),
	Importer.Import(E->getRParenLoc()));
	}

	Expr ASTNodeImporter::VisitInitListExpr(InitListExpr ILE) {
	QualType T = Importer.Import(ILE->getType());
	if (T.isNull())
	return nullptr;

	llvm::SmallVector<Expr *, 4> Exprs(ILE->getNumInits());
	if (ImportContainerChecked(ILE->inits(), Exprs))
	return nullptr;

	ASTContext &ToCtx = Importer.getToContext();
	InitListExpr *To = new (ToCtx) InitListExpr(
	ToCtx, Importer.Import(ILE->getLBraceLoc()),
	Exprs, Importer.Import(ILE->getLBraceLoc()));
	To->setType(T);

	if (ILE->hasArrayFiller()) {
	Expr *Filler = Importer.Import(ILE->getArrayFiller());
	if (!Filler)
	return nullptr;
	To->setArrayFiller(Filler);
	}

	if (FieldDecl *FromFD = ILE->getInitializedFieldInUnion()) {
	FieldDecl *ToFD = cast_or_null<FieldDecl>(Importer.Import(FromFD));
	if (!ToFD)
	return nullptr;
	To->setInitializedFieldInUnion(ToFD);
	}

	if (InitListExpr *SyntForm = ILE->getSyntacticForm()) {
	InitListExpr *ToSyntForm = cast_or_null<InitListExpr>(
	Importer.Import(SyntForm));
	if (!ToSyntForm)
	return nullptr;
	To->setSyntacticForm(ToSyntForm);
	}

	To->sawArrayRangeDesignator(ILE->hadArrayRangeDesignator());
	To->setValueDependent(ILE->isValueDependent());
	To->setInstantiationDependent(ILE->isInstantiationDependent());

	return To;
	}

	Expr ASTNodeImporter::VisitArrayInitLoopExpr(ArrayInitLoopExpr E) {
	QualType ToType = Importer.Import(E->getType());
	if (ToType.isNull())
	return nullptr;

	Expr *ToCommon = Importer.Import(E->getCommonExpr());
	if (!ToCommon && E->getCommonExpr())
	return nullptr;

	Expr *ToSubExpr = Importer.Import(E->getSubExpr());
	if (!ToSubExpr && E->getSubExpr())
	return nullptr;

	return new (Importer.getToContext())
	ArrayInitLoopExpr(ToType, ToCommon, ToSubExpr);
	}

	Expr ASTNodeImporter::VisitArrayInitIndexExpr(ArrayInitIndexExpr E) {
	QualType ToType = Importer.Import(E->getType());
	if (ToType.isNull())
	return nullptr;
	return new (Importer.getToContext()) ArrayInitIndexExpr(ToType);
	}

	Expr ASTNodeImporter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr DIE) {
	FieldDecl *ToField = llvm::dyn_cast_or_null<FieldDecl>(
	Importer.Import(DIE->getField()));
	if (!ToField && DIE->getField())
	return nullptr;

	return CXXDefaultInitExpr::Create(
	Importer.getToContext(), Importer.Import(DIE->getLocStart()), ToField);
	}

	Expr ASTNodeImporter::VisitCXXNamedCastExpr(CXXNamedCastExpr E) {
	QualType ToType = Importer.Import(E->getType());
	if (ToType.isNull() && !E->getType().isNull())
	return nullptr;
	ExprValueKind VK = E->getValueKind();
	CastKind CK = E->getCastKind();
	Expr *ToOp = Importer.Import(E->getSubExpr());
	if (!ToOp && E->getSubExpr())
	return nullptr;
	CXXCastPath BasePath;
	if (ImportCastPath(E, BasePath))
	return nullptr;
	TypeSourceInfo *ToWritten = Importer.Import(E->getTypeInfoAsWritten());
	SourceLocation ToOperatorLoc = Importer.Import(E->getOperatorLoc());
	SourceLocation ToRParenLoc = Importer.Import(E->getRParenLoc());
	SourceRange ToAngleBrackets = Importer.Import(E->getAngleBrackets());

	if (isa<CXXStaticCastExpr>(E)) {
	return CXXStaticCastExpr::Create(
	Importer.getToContext(), ToType, VK, CK, ToOp, &BasePath,
	ToWritten, ToOperatorLoc, ToRParenLoc, ToAngleBrackets);
	} else if (isa<CXXDynamicCastExpr>(E)) {
	return CXXDynamicCastExpr::Create(
	Importer.getToContext(), ToType, VK, CK, ToOp, &BasePath,
	ToWritten, ToOperatorLoc, ToRParenLoc, ToAngleBrackets);
	} else if (isa<CXXReinterpretCastExpr>(E)) {
	return CXXReinterpretCastExpr::Create(
	Importer.getToContext(), ToType, VK, CK, ToOp, &BasePath,
	ToWritten, ToOperatorLoc, ToRParenLoc, ToAngleBrackets);
	} else {
	return nullptr;
	}
	}


	Expr *ASTNodeImporter::VisitSubstNonTypeTemplateParmExpr(
	SubstNonTypeTemplateParmExpr *E) {
	QualType T = Importer.Import(E->getType());
	if (T.isNull())
	return nullptr;

	NonTypeTemplateParmDecl *Param = cast_or_null<NonTypeTemplateParmDecl>(
	Importer.Import(E->getParameter()));
	if (!Param)
	return nullptr;

	Expr *Replacement = Importer.Import(E->getReplacement());
	if (!Replacement)
	return nullptr;

	return new (Importer.getToContext()) SubstNonTypeTemplateParmExpr(
	T, E->getValueKind(), Importer.Import(E->getExprLoc()), Param,
	Replacement);
	}

	void ASTNodeImporter::ImportOverrides(CXXMethodDecl *ToMethod,
	CXXMethodDecl *FromMethod) {
	for (auto *FromOverriddenMethod : FromMethod->overridden_methods())
	ToMethod->addOverriddenMethod(
	cast<CXXMethodDecl>(Importer.Import(const_cast<CXXMethodDecl*>(
	FromOverriddenMethod))));
	}

	ASTImporter::ASTImporter(ASTContext &ToContext, FileManager &ToFileManager,
	ASTContext &FromContext, FileManager &FromFileManager,
	bool MinimalImport)
	: ToContext(ToContext), FromContext(FromContext),
	ToFileManager(ToFileManager), FromFileManager(FromFileManager),
	Minimal(MinimalImport), LastDiagFromFrom(false)
	{
	ImportedDecls[FromContext.getTranslationUnitDecl()]
	= ToContext.getTranslationUnitDecl();
	}

	ASTImporter::~ASTImporter() { }

	QualType ASTImporter::Import(QualType FromT) {
	if (FromT.isNull())
	return QualType();

	const Type *fromTy = FromT.getTypePtr();

	// Check whether we've already imported this type.
	llvm::DenseMap<const Type , const Type >::iterator Pos
	= ImportedTypes.find(fromTy);
	if (Pos != ImportedTypes.end())
	return ToContext.getQualifiedType(Pos->second, FromT.getLocalQualifiers());

	// Import the type
	ASTNodeImporter Importer(*this);
	QualType ToT = Importer.Visit(fromTy);
	if (ToT.isNull())
	return ToT;

	// Record the imported type.
	ImportedTypes[fromTy] = ToT.getTypePtr();

	return ToContext.getQualifiedType(ToT, FromT.getLocalQualifiers());
	}

	TypeSourceInfo ASTImporter::Import(TypeSourceInfo FromTSI) {
	if (!FromTSI)
	return FromTSI;

	// FIXME: For now we just create a "trivial" type source info based
	// on the type and a single location. Implement a real version of this.
	QualType T = Import(FromTSI->getType());
	if (T.isNull())
	return nullptr;

	return ToContext.getTrivialTypeSourceInfo(T,
	Import(FromTSI->getTypeLoc().getLocStart()));
	}

	Decl ASTImporter::GetAlreadyImportedOrNull(Decl FromD) {
	llvm::DenseMap<Decl , Decl >::iterator Pos = ImportedDecls.find(FromD);
	if (Pos != ImportedDecls.end()) {
	Decl *ToD = Pos->second;
	ASTNodeImporter(*this).ImportDefinitionIfNeeded(FromD, ToD);
	return ToD;
	} else {
	return nullptr;
	}
	}

	Decl ASTImporter::Import(Decl FromD) {
	if (!FromD)
	return nullptr;

	ASTNodeImporter Importer(*this);

	// Check whether we've already imported this declaration.
	llvm::DenseMap<Decl , Decl >::iterator Pos = ImportedDecls.find(FromD);
	if (Pos != ImportedDecls.end()) {
	Decl *ToD = Pos->second;
	Importer.ImportDefinitionIfNeeded(FromD, ToD);
	return ToD;
	}

	// Import the type
	Decl *ToD = Importer.Visit(FromD);
	if (!ToD)
	return nullptr;

	// Record the imported declaration.
	ImportedDecls[FromD] = ToD;

	if (TagDecl *FromTag = dyn_cast<TagDecl>(FromD)) {
	// Keep track of anonymous tags that have an associated typedef.
	if (FromTag->getTypedefNameForAnonDecl())
	AnonTagsWithPendingTypedefs.push_back(FromTag);
	} else if (TypedefNameDecl *FromTypedef = dyn_cast<TypedefNameDecl>(FromD)) {
	// When we've finished transforming a typedef, see whether it was the
	// typedef for an anonymous tag.
	for (SmallVectorImpl<TagDecl *>::iterator
	FromTag = AnonTagsWithPendingTypedefs.begin(),
	FromTagEnd = AnonTagsWithPendingTypedefs.end();
	FromTag != FromTagEnd; ++FromTag) {
	if ((*FromTag)->getTypedefNameForAnonDecl() == FromTypedef) {
	if (TagDecl ToTag = cast_or_null<TagDecl>(Import(FromTag))) {
	// We found the typedef for an anonymous tag; link them.
	ToTag->setTypedefNameForAnonDecl(cast<TypedefNameDecl>(ToD));
	AnonTagsWithPendingTypedefs.erase(FromTag);
	break;
	}
	}
	}
	}

	return ToD;
	}

	DeclContext ASTImporter::ImportContext(DeclContext FromDC) {
	if (!FromDC)
	return FromDC;

	DeclContext *ToDC = cast_or_null<DeclContext>(Import(cast<Decl>(FromDC)));
	if (!ToDC)
	return nullptr;

	// When we're using a record/enum/Objective-C class/protocol as a context, we
	// need it to have a definition.
	if (RecordDecl *ToRecord = dyn_cast<RecordDecl>(ToDC)) {
	RecordDecl *FromRecord = cast<RecordDecl>(FromDC);
	if (ToRecord->isCompleteDefinition()) {
	// Do nothing.
	} else if (FromRecord->isCompleteDefinition()) {
	ASTNodeImporter(*this).ImportDefinition(FromRecord, ToRecord,
	ASTNodeImporter::IDK_Basic);
	} else {
	CompleteDecl(ToRecord);
	}
	} else if (EnumDecl *ToEnum = dyn_cast<EnumDecl>(ToDC)) {
	EnumDecl *FromEnum = cast<EnumDecl>(FromDC);
	if (ToEnum->isCompleteDefinition()) {
	// Do nothing.
	} else if (FromEnum->isCompleteDefinition()) {
	ASTNodeImporter(*this).ImportDefinition(FromEnum, ToEnum,
	ASTNodeImporter::IDK_Basic);
	} else {
	CompleteDecl(ToEnum);
	}
	} else if (ObjCInterfaceDecl *ToClass = dyn_cast<ObjCInterfaceDecl>(ToDC)) {
	ObjCInterfaceDecl *FromClass = cast<ObjCInterfaceDecl>(FromDC);
	if (ToClass->getDefinition()) {
	// Do nothing.
	} else if (ObjCInterfaceDecl *FromDef = FromClass->getDefinition()) {
	ASTNodeImporter(*this).ImportDefinition(FromDef, ToClass,
	ASTNodeImporter::IDK_Basic);
	} else {
	CompleteDecl(ToClass);
	}
	} else if (ObjCProtocolDecl *ToProto = dyn_cast<ObjCProtocolDecl>(ToDC)) {
	ObjCProtocolDecl *FromProto = cast<ObjCProtocolDecl>(FromDC);
	if (ToProto->getDefinition()) {
	// Do nothing.
	} else if (ObjCProtocolDecl *FromDef = FromProto->getDefinition()) {
	ASTNodeImporter(*this).ImportDefinition(FromDef, ToProto,
	ASTNodeImporter::IDK_Basic);
	} else {
	CompleteDecl(ToProto);
	}
	}

	return ToDC;
	}

	Expr ASTImporter::Import(Expr FromE) {
	if (!FromE)
	return nullptr;

	return cast_or_null<Expr>(Import(cast<Stmt>(FromE)));
	}

	Stmt ASTImporter::Import(Stmt FromS) {
	if (!FromS)
	return nullptr;

	// Check whether we've already imported this declaration.
	llvm::DenseMap<Stmt , Stmt >::iterator Pos = ImportedStmts.find(FromS);
	if (Pos != ImportedStmts.end())
	return Pos->second;

	// Import the type
	ASTNodeImporter Importer(*this);
	Stmt *ToS = Importer.Visit(FromS);
	if (!ToS)
	return nullptr;

	// Record the imported declaration.
	ImportedStmts[FromS] = ToS;
	return ToS;
	}

	NestedNameSpecifier ASTImporter::Import(NestedNameSpecifier FromNNS) {
	if (!FromNNS)
	return nullptr;

	NestedNameSpecifier *prefix = Import(FromNNS->getPrefix());

	switch (FromNNS->getKind()) {
	case NestedNameSpecifier::Identifier:
	if (IdentifierInfo *II = Import(FromNNS->getAsIdentifier())) {
	return NestedNameSpecifier::Create(ToContext, prefix, II);
	}
	return nullptr;

	case NestedNameSpecifier::Namespace:
	if (NamespaceDecl *NS =
	cast_or_null<NamespaceDecl>(Import(FromNNS->getAsNamespace()))) {
	return NestedNameSpecifier::Create(ToContext, prefix, NS);
	}
	return nullptr;

	case NestedNameSpecifier::NamespaceAlias:
	if (NamespaceAliasDecl *NSAD =
	cast_or_null<NamespaceAliasDecl>(Import(FromNNS->getAsNamespaceAlias()))) {
	return NestedNameSpecifier::Create(ToContext, prefix, NSAD);
	}
	return nullptr;

	case NestedNameSpecifier::Global:
	return NestedNameSpecifier::GlobalSpecifier(ToContext);

	case NestedNameSpecifier::Super:
	if (CXXRecordDecl *RD =
	cast_or_null<CXXRecordDecl>(Import(FromNNS->getAsRecordDecl()))) {
	return NestedNameSpecifier::SuperSpecifier(ToContext, RD);
	}
	return nullptr;

	case NestedNameSpecifier::TypeSpec:
	case NestedNameSpecifier::TypeSpecWithTemplate: {
	QualType T = Import(QualType(FromNNS->getAsType(), 0u));
	if (!T.isNull()) {
	bool bTemplate = FromNNS->getKind() ==
	NestedNameSpecifier::TypeSpecWithTemplate;
	return NestedNameSpecifier::Create(ToContext, prefix,
	bTemplate, T.getTypePtr());
	}
	}
	return nullptr;
	}

	llvm_unreachable("Invalid nested name specifier kind");
	}

	NestedNameSpecifierLoc ASTImporter::Import(NestedNameSpecifierLoc FromNNS) {
	// Copied from NestedNameSpecifier mostly.
	SmallVector<NestedNameSpecifierLoc , 8> NestedNames;
	NestedNameSpecifierLoc NNS = FromNNS;

	// Push each of the nested-name-specifiers's onto a stack for
	// serialization in reverse order.
	while (NNS) {
	NestedNames.push_back(NNS);
	NNS = NNS.getPrefix();
	}

	NestedNameSpecifierLocBuilder Builder;

	while (!NestedNames.empty()) {
	NNS = NestedNames.pop_back_val();
	NestedNameSpecifier *Spec = Import(NNS.getNestedNameSpecifier());
	if (!Spec)
	return NestedNameSpecifierLoc();

	NestedNameSpecifier::SpecifierKind Kind = Spec->getKind();
	switch (Kind) {
	case NestedNameSpecifier::Identifier:
	Builder.Extend(getToContext(),
	Spec->getAsIdentifier(),
	Import(NNS.getLocalBeginLoc()),
	Import(NNS.getLocalEndLoc()));
	break;

	case NestedNameSpecifier::Namespace:
	Builder.Extend(getToContext(),
	Spec->getAsNamespace(),
	Import(NNS.getLocalBeginLoc()),
	Import(NNS.getLocalEndLoc()));
	break;

	case NestedNameSpecifier::NamespaceAlias:
	Builder.Extend(getToContext(),
	Spec->getAsNamespaceAlias(),
	Import(NNS.getLocalBeginLoc()),
	Import(NNS.getLocalEndLoc()));
	break;

	case NestedNameSpecifier::TypeSpec:
	case NestedNameSpecifier::TypeSpecWithTemplate: {
	TypeSourceInfo *TSI = getToContext().getTrivialTypeSourceInfo(
	QualType(Spec->getAsType(), 0));
	Builder.Extend(getToContext(),
	Import(NNS.getLocalBeginLoc()),
	TSI->getTypeLoc(),
	Import(NNS.getLocalEndLoc()));
	break;
	}

	case NestedNameSpecifier::Global:
	Builder.MakeGlobal(getToContext(), Import(NNS.getLocalBeginLoc()));
	break;

	case NestedNameSpecifier::Super: {
	SourceRange ToRange = Import(NNS.getSourceRange());
	Builder.MakeSuper(getToContext(),
	Spec->getAsRecordDecl(),
	ToRange.getBegin(),
	ToRange.getEnd());
	}
	}
	}

	return Builder.getWithLocInContext(getToContext());
	}

	TemplateName ASTImporter::Import(TemplateName From) {
	switch (From.getKind()) {
	case TemplateName::Template:
	if (TemplateDecl *ToTemplate
	= cast_or_null<TemplateDecl>(Import(From.getAsTemplateDecl())))
	return TemplateName(ToTemplate);

	return TemplateName();

	case TemplateName::OverloadedTemplate: {
	OverloadedTemplateStorage *FromStorage = From.getAsOverloadedTemplate();
	UnresolvedSet<2> ToTemplates;
	for (OverloadedTemplateStorage::iterator I = FromStorage->begin(),
	E = FromStorage->end();
	I != E; ++I) {
	if (NamedDecl To = cast_or_null<NamedDecl>(Import(I)))
	ToTemplates.addDecl(To);
	else
	return TemplateName();
	}
	return ToContext.getOverloadedTemplateName(ToTemplates.begin(),
	ToTemplates.end());
	}

	case TemplateName::QualifiedTemplate: {
	QualifiedTemplateName *QTN = From.getAsQualifiedTemplateName();
	NestedNameSpecifier *Qualifier = Import(QTN->getQualifier());
	if (!Qualifier)
	return TemplateName();

	if (TemplateDecl *ToTemplate
	= cast_or_null<TemplateDecl>(Import(From.getAsTemplateDecl())))
	return ToContext.getQualifiedTemplateName(Qualifier,
	QTN->hasTemplateKeyword(),
	ToTemplate);

	return TemplateName();
	}

	case TemplateName::DependentTemplate: {
	DependentTemplateName *DTN = From.getAsDependentTemplateName();
	NestedNameSpecifier *Qualifier = Import(DTN->getQualifier());
	if (!Qualifier)
	return TemplateName();

	if (DTN->isIdentifier()) {
	return ToContext.getDependentTemplateName(Qualifier,
	Import(DTN->getIdentifier()));
	}

	return ToContext.getDependentTemplateName(Qualifier, DTN->getOperator());
	}

	case TemplateName::SubstTemplateTemplateParm: {
	SubstTemplateTemplateParmStorage *subst
	= From.getAsSubstTemplateTemplateParm();
	TemplateTemplateParmDecl *param
	= cast_or_null<TemplateTemplateParmDecl>(Import(subst->getParameter()));
	if (!param)
	return TemplateName();

	TemplateName replacement = Import(subst->getReplacement());
	if (replacement.isNull()) return TemplateName();

	return ToContext.getSubstTemplateTemplateParm(param, replacement);
	}

	case TemplateName::SubstTemplateTemplateParmPack: {
	SubstTemplateTemplateParmPackStorage *SubstPack
	= From.getAsSubstTemplateTemplateParmPack();
	TemplateTemplateParmDecl *Param
	= cast_or_null<TemplateTemplateParmDecl>(
	Import(SubstPack->getParameterPack()));
	if (!Param)
	return TemplateName();

	ASTNodeImporter Importer(*this);
	TemplateArgument ArgPack
	= Importer.ImportTemplateArgument(SubstPack->getArgumentPack());
	if (ArgPack.isNull())
	return TemplateName();

	return ToContext.getSubstTemplateTemplateParmPack(Param, ArgPack);
	}
	}

	llvm_unreachable("Invalid template name kind");
	}

	SourceLocation ASTImporter::Import(SourceLocation FromLoc) {
	if (FromLoc.isInvalid())
	return SourceLocation();

	SourceManager &FromSM = FromContext.getSourceManager();

	// For now, map everything down to its file location, so that we
	// don't have to import macro expansions.
	// FIXME: Import macro expansions!
	FromLoc = FromSM.getFileLoc(FromLoc);
	std::pair<FileID, unsigned> Decomposed = FromSM.getDecomposedLoc(FromLoc);
	SourceManager &ToSM = ToContext.getSourceManager();
	FileID ToFileID = Import(Decomposed.first);
	if (ToFileID.isInvalid())
	return SourceLocation();
	SourceLocation ret = ToSM.getLocForStartOfFile(ToFileID)
	.getLocWithOffset(Decomposed.second);
	return ret;
	}

	SourceRange ASTImporter::Import(SourceRange FromRange) {
	return SourceRange(Import(FromRange.getBegin()), Import(FromRange.getEnd()));
	}

	FileID ASTImporter::Import(FileID FromID) {
	llvm::DenseMap<FileID, FileID>::iterator Pos
	= ImportedFileIDs.find(FromID);
	if (Pos != ImportedFileIDs.end())
	return Pos->second;

	SourceManager &FromSM = FromContext.getSourceManager();
	SourceManager &ToSM = ToContext.getSourceManager();
	const SrcMgr::SLocEntry &FromSLoc = FromSM.getSLocEntry(FromID);
	assert(FromSLoc.isFile() && "Cannot handle macro expansions yet");

	// Include location of this file.
	SourceLocation ToIncludeLoc = Import(FromSLoc.getFile().getIncludeLoc());

	// Map the FileID for to the "to" source manager.
	FileID ToID;
	const SrcMgr::ContentCache *Cache = FromSLoc.getFile().getContentCache();
	if (Cache->OrigEntry && Cache->OrigEntry->getDir()) {
	// FIXME: We probably want to use getVirtualFile(), so we don't hit the
	// disk again
	// FIXME: We definitely want to re-use the existing MemoryBuffer, rather
	// than mmap the files several times.
	const FileEntry *Entry = ToFileManager.getFile(Cache->OrigEntry->getName());
	if (!Entry)
	return FileID();
	ToID = ToSM.createFileID(Entry, ToIncludeLoc,
	FromSLoc.getFile().getFileCharacteristic());
	} else {
	// FIXME: We want to re-use the existing MemoryBuffer!
	const llvm::MemoryBuffer *
	FromBuf = Cache->getBuffer(FromContext.getDiagnostics(), FromSM);
	std::unique_ptr<llvm::MemoryBuffer> ToBuf
	= llvm::MemoryBuffer::getMemBufferCopy(FromBuf->getBuffer(),
	FromBuf->getBufferIdentifier());
	ToID = ToSM.createFileID(std::move(ToBuf),
	FromSLoc.getFile().getFileCharacteristic());
	}


	ImportedFileIDs[FromID] = ToID;
	return ToID;
	}

	CXXCtorInitializer ASTImporter::Import(CXXCtorInitializer From) {
	Expr *ToExpr = Import(From->getInit());
	if (!ToExpr && From->getInit())
	return nullptr;

	if (From->isBaseInitializer()) {
	TypeSourceInfo *ToTInfo = Import(From->getTypeSourceInfo());
	if (!ToTInfo && From->getTypeSourceInfo())
	return nullptr;

	return new (ToContext) CXXCtorInitializer(
	ToContext, ToTInfo, From->isBaseVirtual(), Import(From->getLParenLoc()),
	ToExpr, Import(From->getRParenLoc()),
	From->isPackExpansion() ? Import(From->getEllipsisLoc())
	: SourceLocation());
	} else if (From->isMemberInitializer()) {
	FieldDecl *ToField =
	llvm::cast_or_null<FieldDecl>(Import(From->getMember()));
	if (!ToField && From->getMember())
	return nullptr;

	return new (ToContext) CXXCtorInitializer(
	ToContext, ToField, Import(From->getMemberLocation()),
	Import(From->getLParenLoc()), ToExpr, Import(From->getRParenLoc()));
	} else if (From->isIndirectMemberInitializer()) {
	IndirectFieldDecl *ToIField = llvm::cast_or_null<IndirectFieldDecl>(
	Import(From->getIndirectMember()));
	if (!ToIField && From->getIndirectMember())
	return nullptr;

	return new (ToContext) CXXCtorInitializer(
	ToContext, ToIField, Import(From->getMemberLocation()),
	Import(From->getLParenLoc()), ToExpr, Import(From->getRParenLoc()));
	} else if (From->isDelegatingInitializer()) {
	TypeSourceInfo *ToTInfo = Import(From->getTypeSourceInfo());
	if (!ToTInfo && From->getTypeSourceInfo())
	return nullptr;

	return new (ToContext)
	CXXCtorInitializer(ToContext, ToTInfo, Import(From->getLParenLoc()),
	ToExpr, Import(From->getRParenLoc()));
	} else {
	return nullptr;
	}
	}


	CXXBaseSpecifier ASTImporter::Import(const CXXBaseSpecifier BaseSpec) {
	auto Pos = ImportedCXXBaseSpecifiers.find(BaseSpec);
	if (Pos != ImportedCXXBaseSpecifiers.end())
	return Pos->second;

	CXXBaseSpecifier *Imported = new (ToContext) CXXBaseSpecifier(
	Import(BaseSpec->getSourceRange()),
	BaseSpec->isVirtual(), BaseSpec->isBaseOfClass(),
	BaseSpec->getAccessSpecifierAsWritten(),
	Import(BaseSpec->getTypeSourceInfo()),
	Import(BaseSpec->getEllipsisLoc()));
	ImportedCXXBaseSpecifiers[BaseSpec] = Imported;
	return Imported;
	}

	void ASTImporter::ImportDefinition(Decl *From) {
	Decl *To = Import(From);
	if (!To)
	return;

	if (DeclContext *FromDC = cast<DeclContext>(From)) {
	ASTNodeImporter Importer(*this);

	if (RecordDecl *ToRecord = dyn_cast<RecordDecl>(To)) {
	if (!ToRecord->getDefinition()) {
	Importer.ImportDefinition(cast<RecordDecl>(FromDC), ToRecord,
	ASTNodeImporter::IDK_Everything);
	return;
	}
	}

	if (EnumDecl *ToEnum = dyn_cast<EnumDecl>(To)) {
	if (!ToEnum->getDefinition()) {
	Importer.ImportDefinition(cast<EnumDecl>(FromDC), ToEnum,
	ASTNodeImporter::IDK_Everything);
	return;
	}
	}

	if (ObjCInterfaceDecl *ToIFace = dyn_cast<ObjCInterfaceDecl>(To)) {
	if (!ToIFace->getDefinition()) {
	Importer.ImportDefinition(cast<ObjCInterfaceDecl>(FromDC), ToIFace,
	ASTNodeImporter::IDK_Everything);
	return;
	}
	}

	if (ObjCProtocolDecl *ToProto = dyn_cast<ObjCProtocolDecl>(To)) {
	if (!ToProto->getDefinition()) {
	Importer.ImportDefinition(cast<ObjCProtocolDecl>(FromDC), ToProto,
	ASTNodeImporter::IDK_Everything);
	return;
	}
	}

	Importer.ImportDeclContext(FromDC, true);
	}
	}

	DeclarationName ASTImporter::Import(DeclarationName FromName) {
	if (!FromName)
	return DeclarationName();

	switch (FromName.getNameKind()) {
	case DeclarationName::Identifier:
	return Import(FromName.getAsIdentifierInfo());

	case DeclarationName::ObjCZeroArgSelector:
	case DeclarationName::ObjCOneArgSelector:
	case DeclarationName::ObjCMultiArgSelector:
	return Import(FromName.getObjCSelector());

	case DeclarationName::CXXConstructorName: {
	QualType T = Import(FromName.getCXXNameType());
	if (T.isNull())
	return DeclarationName();

	return ToContext.DeclarationNames.getCXXConstructorName(
	ToContext.getCanonicalType(T));
	}

	case DeclarationName::CXXDestructorName: {
	QualType T = Import(FromName.getCXXNameType());
	if (T.isNull())
	return DeclarationName();

	return ToContext.DeclarationNames.getCXXDestructorName(
	ToContext.getCanonicalType(T));
	}

	case DeclarationName::CXXDeductionGuideName: {
	TemplateDecl *Template = cast_or_null<TemplateDecl>(
	Import(FromName.getCXXDeductionGuideTemplate()));
	if (!Template)
	return DeclarationName();
	return ToContext.DeclarationNames.getCXXDeductionGuideName(Template);
	}

	case DeclarationName::CXXConversionFunctionName: {
	QualType T = Import(FromName.getCXXNameType());
	if (T.isNull())
	return DeclarationName();

	return ToContext.DeclarationNames.getCXXConversionFunctionName(
	ToContext.getCanonicalType(T));
	}

	case DeclarationName::CXXOperatorName:
	return ToContext.DeclarationNames.getCXXOperatorName(
	FromName.getCXXOverloadedOperator());

	case DeclarationName::CXXLiteralOperatorName:
	return ToContext.DeclarationNames.getCXXLiteralOperatorName(
	Import(FromName.getCXXLiteralIdentifier()));

	case DeclarationName::CXXUsingDirective:
	// FIXME: STATICS!
	return DeclarationName::getUsingDirectiveName();
	}

	llvm_unreachable("Invalid DeclarationName Kind!");
	}

	IdentifierInfo ASTImporter::Import(const IdentifierInfo FromId) {
	if (!FromId)
	return nullptr;

	IdentifierInfo *ToId = &ToContext.Idents.get(FromId->getName());

	if (!ToId->getBuiltinID() && FromId->getBuiltinID())
	ToId->setBuiltinID(FromId->getBuiltinID());

	return ToId;
	}

	Selector ASTImporter::Import(Selector FromSel) {
	if (FromSel.isNull())
	return Selector();

	SmallVector<IdentifierInfo *, 4> Idents;
	Idents.push_back(Import(FromSel.getIdentifierInfoForSlot(0)));
	for (unsigned I = 1, N = FromSel.getNumArgs(); I < N; ++I)
	Idents.push_back(Import(FromSel.getIdentifierInfoForSlot(I)));
	return ToContext.Selectors.getSelector(FromSel.getNumArgs(), Idents.data());
	}

	DeclarationName ASTImporter::HandleNameConflict(DeclarationName Name,
	DeclContext *DC,
	unsigned IDNS,
	NamedDecl **Decls,
	unsigned NumDecls) {
	return Name;
	}

	DiagnosticBuilder ASTImporter::ToDiag(SourceLocation Loc, unsigned DiagID) {
	if (LastDiagFromFrom)
	ToContext.getDiagnostics().notePriorDiagnosticFrom(
	FromContext.getDiagnostics());
	LastDiagFromFrom = false;
	return ToContext.getDiagnostics().Report(Loc, DiagID);
	}

	DiagnosticBuilder ASTImporter::FromDiag(SourceLocation Loc, unsigned DiagID) {
	if (!LastDiagFromFrom)
	FromContext.getDiagnostics().notePriorDiagnosticFrom(
	ToContext.getDiagnostics());
	LastDiagFromFrom = true;
	return FromContext.getDiagnostics().Report(Loc, DiagID);
	}

	void ASTImporter::CompleteDecl (Decl *D) {
	if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(D)) {
	if (!ID->getDefinition())
	ID->startDefinition();
	}
	else if (ObjCProtocolDecl *PD = dyn_cast<ObjCProtocolDecl>(D)) {
	if (!PD->getDefinition())
	PD->startDefinition();
	}
	else if (TagDecl *TD = dyn_cast<TagDecl>(D)) {
	if (!TD->getDefinition() && !TD->isBeingDefined()) {
	TD->startDefinition();
	TD->setCompleteDefinition(true);
	}
	}
	else {
	assert (0 && "CompleteDecl called on a Decl that can't be completed");
	}
	}

	Decl ASTImporter::Imported(Decl From, Decl *To) {
	if (From->hasAttrs()) {
	for (Attr *FromAttr : From->getAttrs())
	To->addAttr(FromAttr->clone(To->getASTContext()));
	}
	if (From->isUsed()) {
	To->setIsUsed();
	}
	if (From->isImplicit()) {
	To->setImplicit();
	}
	ImportedDecls[From] = To;
	return To;
	}

	bool ASTImporter::IsStructurallyEquivalent(QualType From, QualType To,
	bool Complain) {
	llvm::DenseMap<const Type , const Type >::iterator Pos
	= ImportedTypes.find(From.getTypePtr());
	if (Pos != ImportedTypes.end() && ToContext.hasSameType(Import(From), To))
	return true;

	StructuralEquivalenceContext Ctx(FromContext, ToContext, NonEquivalentDecls,
	false, Complain);
	return Ctx.IsStructurallyEquivalent(From, To);
	}
	Index: head/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp (revision 322855)
	@@ -1,2566 +1,2592 @@
	//===--- DeclCXX.cpp - C++ Declaration AST Node Implementation ------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the C++ related Decl classes.
	//
	//===----------------------------------------------------------------------===//
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTLambda.h"
	#include "clang/AST/ASTMutationListener.h"
	#include "clang/AST/CXXInheritance.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/ODRHash.h"
	#include "clang/AST/TypeLoc.h"
	#include "clang/Basic/IdentifierTable.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallPtrSet.h"
	using namespace clang;

	//===----------------------------------------------------------------------===//
	// Decl Allocation/Deallocation Method Implementations
	//===----------------------------------------------------------------------===//

	void AccessSpecDecl::anchor() { }

	AccessSpecDecl *AccessSpecDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
	return new (C, ID) AccessSpecDecl(EmptyShell());
	}

	void LazyASTUnresolvedSet::getFromExternalSource(ASTContext &C) const {
	ExternalASTSource *Source = C.getExternalSource();
	assert(Impl.Decls.isLazy() && "getFromExternalSource for non-lazy set");
	assert(Source && "getFromExternalSource with no external source");

	for (ASTUnresolvedSet::iterator I = Impl.begin(); I != Impl.end(); ++I)
	I.setDecl(cast<NamedDecl>(Source->GetExternalDecl(
	reinterpret_cast<uintptr_t>(I.getDecl()) >> 2)));
	Impl.Decls.setLazy(false);
	}

	CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D)
	: UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0),
	Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false),
	Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true),
	HasPrivateFields(false), HasProtectedFields(false),
	HasPublicFields(false), HasMutableFields(false), HasVariantMembers(false),
	HasOnlyCMembers(true), HasInClassInitializer(false),
	HasUninitializedReferenceMember(false), HasUninitializedFields(false),
	HasInheritedConstructor(false), HasInheritedAssignment(false),
	+ NeedOverloadResolutionForCopyConstructor(false),
	NeedOverloadResolutionForMoveConstructor(false),
	NeedOverloadResolutionForMoveAssignment(false),
	NeedOverloadResolutionForDestructor(false),
	+ DefaultedCopyConstructorIsDeleted(false),
	DefaultedMoveConstructorIsDeleted(false),
	DefaultedMoveAssignmentIsDeleted(false),
	DefaultedDestructorIsDeleted(false), HasTrivialSpecialMembers(SMF_All),
	DeclaredNonTrivialSpecialMembers(0), HasIrrelevantDestructor(true),
	HasConstexprNonCopyMoveConstructor(false),
	HasDefaultedDefaultConstructor(false),
	+ CanPassInRegisters(true),
	DefaultedDefaultConstructorIsConstexpr(true),
	HasConstexprDefaultConstructor(false),
	HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false),
	UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0),
	ImplicitCopyConstructorCanHaveConstParamForVBase(true),
	ImplicitCopyConstructorCanHaveConstParamForNonVBase(true),
	ImplicitCopyAssignmentHasConstParam(true),
	HasDeclaredCopyConstructorWithConstParam(false),
	HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false),
	IsParsingBaseSpecifiers(false), HasODRHash(false), ODRHash(0),
	NumBases(0), NumVBases(0), Bases(), VBases(), Definition(D),
	FirstFriend() {}

	CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const {
	return Bases.get(Definition->getASTContext().getExternalSource());
	}

	CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getVBasesSlowCase() const {
	return VBases.get(Definition->getASTContext().getExternalSource());
	}

	CXXRecordDecl::CXXRecordDecl(Kind K, TagKind TK, const ASTContext &C,
	DeclContext *DC, SourceLocation StartLoc,
	SourceLocation IdLoc, IdentifierInfo *Id,
	CXXRecordDecl *PrevDecl)
	: RecordDecl(K, TK, C, DC, StartLoc, IdLoc, Id, PrevDecl),
	DefinitionData(PrevDecl ? PrevDecl->DefinitionData
	: nullptr),
	TemplateOrInstantiation() {}

	CXXRecordDecl *CXXRecordDecl::Create(const ASTContext &C, TagKind TK,
	DeclContext *DC, SourceLocation StartLoc,
	SourceLocation IdLoc, IdentifierInfo *Id,
	CXXRecordDecl* PrevDecl,
	bool DelayTypeCreation) {
	CXXRecordDecl *R = new (C, DC) CXXRecordDecl(CXXRecord, TK, C, DC, StartLoc,
	IdLoc, Id, PrevDecl);
	R->MayHaveOutOfDateDef = C.getLangOpts().Modules;

	// FIXME: DelayTypeCreation seems like such a hack
	if (!DelayTypeCreation)
	C.getTypeDeclType(R, PrevDecl);
	return R;
	}

	CXXRecordDecl *
	CXXRecordDecl::CreateLambda(const ASTContext &C, DeclContext *DC,
	TypeSourceInfo *Info, SourceLocation Loc,
	bool Dependent, bool IsGeneric,
	LambdaCaptureDefault CaptureDefault) {
	CXXRecordDecl *R =
	new (C, DC) CXXRecordDecl(CXXRecord, TTK_Class, C, DC, Loc, Loc,
	nullptr, nullptr);
	R->IsBeingDefined = true;
	R->DefinitionData =
	new (C) struct LambdaDefinitionData(R, Info, Dependent, IsGeneric,
	CaptureDefault);
	R->MayHaveOutOfDateDef = false;
	R->setImplicit(true);
	C.getTypeDeclType(R, /PrevDecl=/nullptr);
	return R;
	}

	CXXRecordDecl *
	CXXRecordDecl::CreateDeserialized(const ASTContext &C, unsigned ID) {
	CXXRecordDecl *R = new (C, ID) CXXRecordDecl(
	CXXRecord, TTK_Struct, C, nullptr, SourceLocation(), SourceLocation(),
	nullptr, nullptr);
	R->MayHaveOutOfDateDef = false;
	return R;
	}

	void
	CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
	unsigned NumBases) {
	ASTContext &C = getASTContext();

	if (!data().Bases.isOffset() && data().NumBases > 0)
	C.Deallocate(data().getBases());

	if (NumBases) {
	if (!C.getLangOpts().CPlusPlus1z) {
	// C++ [dcl.init.aggr]p1:
	// An aggregate is [...] a class with [...] no base classes [...].
	data().Aggregate = false;
	}

	// C++ [class]p4:
	// A POD-struct is an aggregate class...
	data().PlainOldData = false;
	}

	// The set of seen virtual base types.
	llvm::SmallPtrSet<CanQualType, 8> SeenVBaseTypes;

	// The virtual bases of this class.
	SmallVector<const CXXBaseSpecifier *, 8> VBases;

	data().Bases = new(C) CXXBaseSpecifier [NumBases];
	data().NumBases = NumBases;
	for (unsigned i = 0; i < NumBases; ++i) {
	data().getBases()[i] = *Bases[i];
	// Keep track of inherited vbases for this base class.
	const CXXBaseSpecifier *Base = Bases[i];
	QualType BaseType = Base->getType();
	// Skip dependent types; we can't do any checking on them now.
	if (BaseType->isDependentType())
	continue;
	CXXRecordDecl *BaseClassDecl
	= cast<CXXRecordDecl>(BaseType->getAs<RecordType>()->getDecl());

	if (!BaseClassDecl->isEmpty()) {
	if (!data().Empty) {
	// C++0x [class]p7:
	// A standard-layout class is a class that:
	// [...]
	// -- either has no non-static data members in the most derived
	// class and at most one base class with non-static data members,
	// or has no base classes with non-static data members, and
	// If this is the second non-empty base, then neither of these two
	// clauses can be true.
	data().IsStandardLayout = false;
	}

	// C++14 [meta.unary.prop]p4:
	// T is a class type [...] with [...] no base class B for which
	// is_empty<B>::value is false.
	data().Empty = false;
	data().HasNoNonEmptyBases = false;
	}

	// C++1z [dcl.init.agg]p1:
	// An aggregate is a class with [...] no private or protected base classes
	if (Base->getAccessSpecifier() != AS_public)
	data().Aggregate = false;

	// C++ [class.virtual]p1:
	// A class that declares or inherits a virtual function is called a
	// polymorphic class.
	if (BaseClassDecl->isPolymorphic())
	data().Polymorphic = true;

	// C++0x [class]p7:
	// A standard-layout class is a class that: [...]
	// -- has no non-standard-layout base classes
	if (!BaseClassDecl->isStandardLayout())
	data().IsStandardLayout = false;

	// Record if this base is the first non-literal field or base.
	if (!hasNonLiteralTypeFieldsOrBases() && !BaseType->isLiteralType(C))
	data().HasNonLiteralTypeFieldsOrBases = true;

	// Now go through all virtual bases of this base and add them.
	for (const auto &VBase : BaseClassDecl->vbases()) {
	// Add this base if it's not already in the list.
	if (SeenVBaseTypes.insert(C.getCanonicalType(VBase.getType())).second) {
	VBases.push_back(&VBase);

	// C++11 [class.copy]p8:
	// The implicitly-declared copy constructor for a class X will have
	// the form 'X::X(const X&)' if each [...] virtual base class B of X
	// has a copy constructor whose first parameter is of type
	// 'const B&' or 'const volatile B&' [...]
	if (CXXRecordDecl *VBaseDecl = VBase.getType()->getAsCXXRecordDecl())
	if (!VBaseDecl->hasCopyConstructorWithConstParam())
	data().ImplicitCopyConstructorCanHaveConstParamForVBase = false;

	// C++1z [dcl.init.agg]p1:
	// An aggregate is a class with [...] no virtual base classes
	data().Aggregate = false;
	}
	}

	if (Base->isVirtual()) {
	// Add this base if it's not already in the list.
	if (SeenVBaseTypes.insert(C.getCanonicalType(BaseType)).second)
	VBases.push_back(Base);

	// C++14 [meta.unary.prop] is_empty:
	// T is a class type, but not a union type, with ... no virtual base
	// classes
	data().Empty = false;

	// C++1z [dcl.init.agg]p1:
	// An aggregate is a class with [...] no virtual base classes
	data().Aggregate = false;

	// C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25:
	// A [default constructor, copy/move constructor, or copy/move assignment
	// operator for a class X] is trivial [...] if:
	// -- class X has [...] no virtual base classes
	data().HasTrivialSpecialMembers &= SMF_Destructor;

	// C++0x [class]p7:
	// A standard-layout class is a class that: [...]
	// -- has [...] no virtual base classes
	data().IsStandardLayout = false;

	// C++11 [dcl.constexpr]p4:
	// In the definition of a constexpr constructor [...]
	// -- the class shall not have any virtual base classes
	data().DefaultedDefaultConstructorIsConstexpr = false;

	// C++1z [class.copy]p8:
	// The implicitly-declared copy constructor for a class X will have
	// the form 'X::X(const X&)' if each potentially constructed subobject
	// has a copy constructor whose first parameter is of type
	// 'const B&' or 'const volatile B&' [...]
	if (!BaseClassDecl->hasCopyConstructorWithConstParam())
	data().ImplicitCopyConstructorCanHaveConstParamForVBase = false;
	} else {
	// C++ [class.ctor]p5:
	// A default constructor is trivial [...] if:
	// -- all the direct base classes of its class have trivial default
	// constructors.
	if (!BaseClassDecl->hasTrivialDefaultConstructor())
	data().HasTrivialSpecialMembers &= ~SMF_DefaultConstructor;

	// C++0x [class.copy]p13:
	// A copy/move constructor for class X is trivial if [...]
	// [...]
	// -- the constructor selected to copy/move each direct base class
	// subobject is trivial, and
	if (!BaseClassDecl->hasTrivialCopyConstructor())
	data().HasTrivialSpecialMembers &= ~SMF_CopyConstructor;
	// If the base class doesn't have a simple move constructor, we'll eagerly
	// declare it and perform overload resolution to determine which function
	// it actually calls. If it does have a simple move constructor, this
	// check is correct.
	if (!BaseClassDecl->hasTrivialMoveConstructor())
	data().HasTrivialSpecialMembers &= ~SMF_MoveConstructor;

	// C++0x [class.copy]p27:
	// A copy/move assignment operator for class X is trivial if [...]
	// [...]
	// -- the assignment operator selected to copy/move each direct base
	// class subobject is trivial, and
	if (!BaseClassDecl->hasTrivialCopyAssignment())
	data().HasTrivialSpecialMembers &= ~SMF_CopyAssignment;
	// If the base class doesn't have a simple move assignment, we'll eagerly
	// declare it and perform overload resolution to determine which function
	// it actually calls. If it does have a simple move assignment, this
	// check is correct.
	if (!BaseClassDecl->hasTrivialMoveAssignment())
	data().HasTrivialSpecialMembers &= ~SMF_MoveAssignment;

	// C++11 [class.ctor]p6:
	// If that user-written default constructor would satisfy the
	// requirements of a constexpr constructor, the implicitly-defined
	// default constructor is constexpr.
	if (!BaseClassDecl->hasConstexprDefaultConstructor())
	data().DefaultedDefaultConstructorIsConstexpr = false;

	// C++1z [class.copy]p8:
	// The implicitly-declared copy constructor for a class X will have
	// the form 'X::X(const X&)' if each potentially constructed subobject
	// has a copy constructor whose first parameter is of type
	// 'const B&' or 'const volatile B&' [...]
	if (!BaseClassDecl->hasCopyConstructorWithConstParam())
	data().ImplicitCopyConstructorCanHaveConstParamForNonVBase = false;
	}

	// C++ [class.ctor]p3:
	// A destructor is trivial if all the direct base classes of its class
	// have trivial destructors.
	if (!BaseClassDecl->hasTrivialDestructor())
	data().HasTrivialSpecialMembers &= ~SMF_Destructor;

	if (!BaseClassDecl->hasIrrelevantDestructor())
	data().HasIrrelevantDestructor = false;

	// C++11 [class.copy]p18:
	// The implicitly-declared copy assignment oeprator for a class X will
	// have the form 'X& X::operator=(const X&)' if each direct base class B
	// of X has a copy assignment operator whose parameter is of type 'const
	// B&', 'const volatile B&', or 'B' [...]
	if (!BaseClassDecl->hasCopyAssignmentWithConstParam())
	data().ImplicitCopyAssignmentHasConstParam = false;

	// A class has an Objective-C object member if... or any of its bases
	// has an Objective-C object member.
	if (BaseClassDecl->hasObjectMember())
	setHasObjectMember(true);

	if (BaseClassDecl->hasVolatileMember())
	setHasVolatileMember(true);

	// Keep track of the presence of mutable fields.
	- if (BaseClassDecl->hasMutableFields())
	+ if (BaseClassDecl->hasMutableFields()) {
	data().HasMutableFields = true;
	+ data().NeedOverloadResolutionForCopyConstructor = true;
	+ }

	if (BaseClassDecl->hasUninitializedReferenceMember())
	data().HasUninitializedReferenceMember = true;

	if (!BaseClassDecl->allowConstDefaultInit())
	data().HasUninitializedFields = true;

	addedClassSubobject(BaseClassDecl);
	}

	if (VBases.empty()) {
	data().IsParsingBaseSpecifiers = false;
	return;
	}

	// Create base specifier for any direct or indirect virtual bases.
	data().VBases = new (C) CXXBaseSpecifier[VBases.size()];
	data().NumVBases = VBases.size();
	for (int I = 0, E = VBases.size(); I != E; ++I) {
	QualType Type = VBases[I]->getType();
	if (!Type->isDependentType())
	addedClassSubobject(Type->getAsCXXRecordDecl());
	data().getVBases()[I] = *VBases[I];
	}

	data().IsParsingBaseSpecifiers = false;
	}

	unsigned CXXRecordDecl::getODRHash() const {
	assert(hasDefinition() && "ODRHash only for records with definitions");

	// Previously calculated hash is stored in DefinitionData.
	if (DefinitionData->HasODRHash)
	return DefinitionData->ODRHash;

	// Only calculate hash on first call of getODRHash per record.
	ODRHash Hash;
	Hash.AddCXXRecordDecl(getDefinition());
	DefinitionData->HasODRHash = true;
	DefinitionData->ODRHash = Hash.CalculateHash();

	return DefinitionData->ODRHash;
	}


	void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) {
	// C++11 [class.copy]p11:
	// A defaulted copy/move constructor for a class X is defined as
	// deleted if X has:
	// -- a direct or virtual base class B that cannot be copied/moved [...]
	// -- a non-static data member of class type M (or array thereof)
	// that cannot be copied or moved [...]
	+ if (!Subobj->hasSimpleCopyConstructor())
	+ data().NeedOverloadResolutionForCopyConstructor = true;
	if (!Subobj->hasSimpleMoveConstructor())
	data().NeedOverloadResolutionForMoveConstructor = true;

	// C++11 [class.copy]p23:
	// A defaulted copy/move assignment operator for a class X is defined as
	// deleted if X has:
	// -- a direct or virtual base class B that cannot be copied/moved [...]
	// -- a non-static data member of class type M (or array thereof)
	// that cannot be copied or moved [...]
	if (!Subobj->hasSimpleMoveAssignment())
	data().NeedOverloadResolutionForMoveAssignment = true;

	// C++11 [class.ctor]p5, C++11 [class.copy]p11, C++11 [class.dtor]p5:
	// A defaulted [ctor or dtor] for a class X is defined as
	// deleted if X has:
	// -- any direct or virtual base class [...] has a type with a destructor
	// that is deleted or inaccessible from the defaulted [ctor or dtor].
	// -- any non-static data member has a type with a destructor
	// that is deleted or inaccessible from the defaulted [ctor or dtor].
	if (!Subobj->hasSimpleDestructor()) {
	+ data().NeedOverloadResolutionForCopyConstructor = true;
	data().NeedOverloadResolutionForMoveConstructor = true;
	data().NeedOverloadResolutionForDestructor = true;
	}
	}

	bool CXXRecordDecl::hasAnyDependentBases() const {
	if (!isDependentContext())
	return false;

	return !forallBases([](const CXXRecordDecl *) { return true; });
	}

	bool CXXRecordDecl::isTriviallyCopyable() const {
	// C++0x [class]p5:
	// A trivially copyable class is a class that:
	// -- has no non-trivial copy constructors,
	if (hasNonTrivialCopyConstructor()) return false;
	// -- has no non-trivial move constructors,
	if (hasNonTrivialMoveConstructor()) return false;
	// -- has no non-trivial copy assignment operators,
	if (hasNonTrivialCopyAssignment()) return false;
	// -- has no non-trivial move assignment operators, and
	if (hasNonTrivialMoveAssignment()) return false;
	// -- has a trivial destructor.
	if (!hasTrivialDestructor()) return false;

	return true;
	}

	void CXXRecordDecl::markedVirtualFunctionPure() {
	// C++ [class.abstract]p2:
	// A class is abstract if it has at least one pure virtual function.
	data().Abstract = true;
	}

	void CXXRecordDecl::addedMember(Decl *D) {
	if (!D->isImplicit() &&
	!isa<FieldDecl>(D) &&
	!isa<IndirectFieldDecl>(D) &&
	(!isa<TagDecl>(D) \|\| cast<TagDecl>(D)->getTagKind() == TTK_Class \|\|
	cast<TagDecl>(D)->getTagKind() == TTK_Interface))
	data().HasOnlyCMembers = false;

	// Ignore friends and invalid declarations.
	if (D->getFriendObjectKind() \|\| D->isInvalidDecl())
	return;

	FunctionTemplateDecl *FunTmpl = dyn_cast<FunctionTemplateDecl>(D);
	if (FunTmpl)
	D = FunTmpl->getTemplatedDecl();

	// FIXME: Pass NamedDecl* to addedMember?
	Decl *DUnderlying = D;
	if (auto *ND = dyn_cast<NamedDecl>(DUnderlying)) {
	DUnderlying = ND->getUnderlyingDecl();
	if (FunctionTemplateDecl *UnderlyingFunTmpl =
	dyn_cast<FunctionTemplateDecl>(DUnderlying))
	DUnderlying = UnderlyingFunTmpl->getTemplatedDecl();
	}

	if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
	if (Method->isVirtual()) {
	// C++ [dcl.init.aggr]p1:
	// An aggregate is an array or a class with [...] no virtual functions.
	data().Aggregate = false;

	// C++ [class]p4:
	// A POD-struct is an aggregate class...
	data().PlainOldData = false;

	// C++14 [meta.unary.prop]p4:
	// T is a class type [...] with [...] no virtual member functions...
	data().Empty = false;

	// C++ [class.virtual]p1:
	// A class that declares or inherits a virtual function is called a
	// polymorphic class.
	data().Polymorphic = true;

	// C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25:
	// A [default constructor, copy/move constructor, or copy/move
	// assignment operator for a class X] is trivial [...] if:
	// -- class X has no virtual functions [...]
	data().HasTrivialSpecialMembers &= SMF_Destructor;

	// C++0x [class]p7:
	// A standard-layout class is a class that: [...]
	// -- has no virtual functions
	data().IsStandardLayout = false;
	}
	}

	// Notify the listener if an implicit member was added after the definition
	// was completed.
	if (!isBeingDefined() && D->isImplicit())
	if (ASTMutationListener *L = getASTMutationListener())
	L->AddedCXXImplicitMember(data().Definition, D);

	// The kind of special member this declaration is, if any.
	unsigned SMKind = 0;

	// Handle constructors.
	if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(D)) {
	if (!Constructor->isImplicit()) {
	// Note that we have a user-declared constructor.
	data().UserDeclaredConstructor = true;

	// C++ [class]p4:
	// A POD-struct is an aggregate class [...]
	// Since the POD bit is meant to be C++03 POD-ness, clear it even if the
	// type is technically an aggregate in C++0x since it wouldn't be in 03.
	data().PlainOldData = false;
	}

	if (Constructor->isDefaultConstructor()) {
	SMKind \|= SMF_DefaultConstructor;

	if (Constructor->isUserProvided())
	data().UserProvidedDefaultConstructor = true;
	if (Constructor->isConstexpr())
	data().HasConstexprDefaultConstructor = true;
	if (Constructor->isDefaulted())
	data().HasDefaultedDefaultConstructor = true;
	}

	if (!FunTmpl) {
	unsigned Quals;
	if (Constructor->isCopyConstructor(Quals)) {
	SMKind \|= SMF_CopyConstructor;

	if (Quals & Qualifiers::Const)
	data().HasDeclaredCopyConstructorWithConstParam = true;
	} else if (Constructor->isMoveConstructor())
	SMKind \|= SMF_MoveConstructor;
	}

	// C++11 [dcl.init.aggr]p1: DR1518
	// An aggregate is an array or a class with no user-provided, explicit, or
	// inherited constructors
	if (Constructor->isUserProvided() \|\| Constructor->isExplicit())
	data().Aggregate = false;
	}

	// Handle constructors, including those inherited from base classes.
	if (CXXConstructorDecl *Constructor =
	dyn_cast<CXXConstructorDecl>(DUnderlying)) {
	// Record if we see any constexpr constructors which are neither copy
	// nor move constructors.
	// C++1z [basic.types]p10:
	// [...] has at least one constexpr constructor or constructor template
	// (possibly inherited from a base class) that is not a copy or move
	// constructor [...]
	if (Constructor->isConstexpr() && !Constructor->isCopyOrMoveConstructor())
	data().HasConstexprNonCopyMoveConstructor = true;
	}

	// Handle destructors.
	if (CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(D)) {
	SMKind \|= SMF_Destructor;

	if (DD->isUserProvided())
	data().HasIrrelevantDestructor = false;
	// If the destructor is explicitly defaulted and not trivial or not public
	// or if the destructor is deleted, we clear HasIrrelevantDestructor in
	// finishedDefaultedOrDeletedMember.

	// C++11 [class.dtor]p5:
	// A destructor is trivial if [...] the destructor is not virtual.
	if (DD->isVirtual())
	data().HasTrivialSpecialMembers &= ~SMF_Destructor;
	}

	// Handle member functions.
	if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
	if (Method->isCopyAssignmentOperator()) {
	SMKind \|= SMF_CopyAssignment;

	const ReferenceType *ParamTy =
	Method->getParamDecl(0)->getType()->getAs<ReferenceType>();
	if (!ParamTy \|\| ParamTy->getPointeeType().isConstQualified())
	data().HasDeclaredCopyAssignmentWithConstParam = true;
	}

	if (Method->isMoveAssignmentOperator())
	SMKind \|= SMF_MoveAssignment;

	// Keep the list of conversion functions up-to-date.
	if (CXXConversionDecl *Conversion = dyn_cast<CXXConversionDecl>(D)) {
	// FIXME: We use the 'unsafe' accessor for the access specifier here,
	// because Sema may not have set it yet. That's really just a misdesign
	// in Sema. However, LLDB will have set the access specifier correctly,
	// and adds declarations after the class is technically completed,
	// so completeDefinition()'s overriding of the access specifiers doesn't
	// work.
	AccessSpecifier AS = Conversion->getAccessUnsafe();

	if (Conversion->getPrimaryTemplate()) {
	// We don't record specializations.
	} else {
	ASTContext &Ctx = getASTContext();
	ASTUnresolvedSet &Conversions = data().Conversions.get(Ctx);
	NamedDecl *Primary =
	FunTmpl ? cast<NamedDecl>(FunTmpl) : cast<NamedDecl>(Conversion);
	if (Primary->getPreviousDecl())
	Conversions.replace(cast<NamedDecl>(Primary->getPreviousDecl()),
	Primary, AS);
	else
	Conversions.addDecl(Ctx, Primary, AS);
	}
	}

	if (SMKind) {
	// If this is the first declaration of a special member, we no longer have
	// an implicit trivial special member.
	data().HasTrivialSpecialMembers &=
	data().DeclaredSpecialMembers \| ~SMKind;

	if (!Method->isImplicit() && !Method->isUserProvided()) {
	// This method is user-declared but not user-provided. We can't work out
	// whether it's trivial yet (not until we get to the end of the class).
	// We'll handle this method in finishedDefaultedOrDeletedMember.
	} else if (Method->isTrivial())
	data().HasTrivialSpecialMembers \|= SMKind;
	else
	data().DeclaredNonTrivialSpecialMembers \|= SMKind;

	// Note when we have declared a declared special member, and suppress the
	// implicit declaration of this special member.
	data().DeclaredSpecialMembers \|= SMKind;

	if (!Method->isImplicit()) {
	data().UserDeclaredSpecialMembers \|= SMKind;

	// C++03 [class]p4:
	// A POD-struct is an aggregate class that has [...] no user-defined
	// copy assignment operator and no user-defined destructor.
	//
	// Since the POD bit is meant to be C++03 POD-ness, and in C++03,
	// aggregates could not have any constructors, clear it even for an
	// explicitly defaulted or deleted constructor.
	// type is technically an aggregate in C++0x since it wouldn't be in 03.
	//
	// Also, a user-declared move assignment operator makes a class non-POD.
	// This is an extension in C++03.
	data().PlainOldData = false;
	}
	}

	return;
	}

	// Handle non-static data members.
	if (FieldDecl *Field = dyn_cast<FieldDecl>(D)) {
	// C++ [class.bit]p2:
	// A declaration for a bit-field that omits the identifier declares an
	// unnamed bit-field. Unnamed bit-fields are not members and cannot be
	// initialized.
	if (Field->isUnnamedBitfield())
	return;

	// C++ [dcl.init.aggr]p1:
	// An aggregate is an array or a class (clause 9) with [...] no
	// private or protected non-static data members (clause 11).
	//
	// A POD must be an aggregate.
	if (D->getAccess() == AS_private \|\| D->getAccess() == AS_protected) {
	data().Aggregate = false;
	data().PlainOldData = false;
	}

	// C++0x [class]p7:
	// A standard-layout class is a class that:
	// [...]
	// -- has the same access control for all non-static data members,
	switch (D->getAccess()) {
	case AS_private: data().HasPrivateFields = true; break;
	case AS_protected: data().HasProtectedFields = true; break;
	case AS_public: data().HasPublicFields = true; break;
	case AS_none: llvm_unreachable("Invalid access specifier");
	};
	if ((data().HasPrivateFields + data().HasProtectedFields +
	data().HasPublicFields) > 1)
	data().IsStandardLayout = false;

	// Keep track of the presence of mutable fields.
	- if (Field->isMutable())
	+ if (Field->isMutable()) {
	data().HasMutableFields = true;
	+ data().NeedOverloadResolutionForCopyConstructor = true;
	+ }

	// C++11 [class.union]p8, DR1460:
	// If X is a union, a non-static data member of X that is not an anonymous
	// union is a variant member of X.
	if (isUnion() && !Field->isAnonymousStructOrUnion())
	data().HasVariantMembers = true;

	// C++0x [class]p9:
	// A POD struct is a class that is both a trivial class and a
	// standard-layout class, and has no non-static data members of type
	// non-POD struct, non-POD union (or array of such types).
	//
	// Automatic Reference Counting: the presence of a member of Objective-C pointer type
	// that does not explicitly have no lifetime makes the class a non-POD.
	ASTContext &Context = getASTContext();
	QualType T = Context.getBaseElementType(Field->getType());
	if (T->isObjCRetainableType() \|\| T.isObjCGCStrong()) {
	if (T.hasNonTrivialObjCLifetime()) {
	// Objective-C Automatic Reference Counting:
	// If a class has a non-static data member of Objective-C pointer
	// type (or array thereof), it is a non-POD type and its
	// default constructor (if any), copy constructor, move constructor,
	// copy assignment operator, move assignment operator, and destructor are
	// non-trivial.
	setHasObjectMember(true);
	struct DefinitionData &Data = data();
	Data.PlainOldData = false;
	Data.HasTrivialSpecialMembers = 0;
	Data.HasIrrelevantDestructor = false;
	} else if (!Context.getLangOpts().ObjCAutoRefCount) {
	setHasObjectMember(true);
	}
	} else if (!T.isCXX98PODType(Context))
	data().PlainOldData = false;

	if (T->isReferenceType()) {
	if (!Field->hasInClassInitializer())
	data().HasUninitializedReferenceMember = true;

	// C++0x [class]p7:
	// A standard-layout class is a class that:
	// -- has no non-static data members of type [...] reference,
	data().IsStandardLayout = false;
	+
	+ // C++1z [class.copy.ctor]p10:
	+ // A defaulted copy constructor for a class X is defined as deleted if X has:
	+ // -- a non-static data member of rvalue reference type
	+ if (T->isRValueReferenceType())
	+ data().DefaultedCopyConstructorIsDeleted = true;
	}

	if (!Field->hasInClassInitializer() && !Field->isMutable()) {
	if (CXXRecordDecl *FieldType = T->getAsCXXRecordDecl()) {
	if (FieldType->hasDefinition() && !FieldType->allowConstDefaultInit())
	data().HasUninitializedFields = true;
	} else {
	data().HasUninitializedFields = true;
	}
	}

	// Record if this field is the first non-literal or volatile field or base.
	if (!T->isLiteralType(Context) \|\| T.isVolatileQualified())
	data().HasNonLiteralTypeFieldsOrBases = true;

	if (Field->hasInClassInitializer() \|\|
	(Field->isAnonymousStructOrUnion() &&
	Field->getType()->getAsCXXRecordDecl()->hasInClassInitializer())) {
	data().HasInClassInitializer = true;

	// C++11 [class]p5:
	// A default constructor is trivial if [...] no non-static data member
	// of its class has a brace-or-equal-initializer.
	data().HasTrivialSpecialMembers &= ~SMF_DefaultConstructor;

	// C++11 [dcl.init.aggr]p1:
	// An aggregate is a [...] class with [...] no
	// brace-or-equal-initializers for non-static data members.
	//
	// This rule was removed in C++14.
	if (!getASTContext().getLangOpts().CPlusPlus14)
	data().Aggregate = false;

	// C++11 [class]p10:
	// A POD struct is [...] a trivial class.
	data().PlainOldData = false;
	}

	// C++11 [class.copy]p23:
	// A defaulted copy/move assignment operator for a class X is defined
	// as deleted if X has:
	// -- a non-static data member of reference type
	if (T->isReferenceType())
	data().DefaultedMoveAssignmentIsDeleted = true;

	if (const RecordType *RecordTy = T->getAs<RecordType>()) {
	CXXRecordDecl* FieldRec = cast<CXXRecordDecl>(RecordTy->getDecl());
	if (FieldRec->getDefinition()) {
	addedClassSubobject(FieldRec);

	// We may need to perform overload resolution to determine whether a
	// field can be moved if it's const or volatile qualified.
	if (T.getCVRQualifiers() & (Qualifiers::Const \| Qualifiers::Volatile)) {
	+ // We need to care about 'const' for the copy constructor because an
	+ // implicit copy constructor might be declared with a non-const
	+ // parameter.
	+ data().NeedOverloadResolutionForCopyConstructor = true;
	data().NeedOverloadResolutionForMoveConstructor = true;
	data().NeedOverloadResolutionForMoveAssignment = true;
	}

	// C++11 [class.ctor]p5, C++11 [class.copy]p11:
	// A defaulted [special member] for a class X is defined as
	// deleted if:
	// -- X is a union-like class that has a variant member with a
	// non-trivial [corresponding special member]
	if (isUnion()) {
	+ if (FieldRec->hasNonTrivialCopyConstructor())
	+ data().DefaultedCopyConstructorIsDeleted = true;
	if (FieldRec->hasNonTrivialMoveConstructor())
	data().DefaultedMoveConstructorIsDeleted = true;
	if (FieldRec->hasNonTrivialMoveAssignment())
	data().DefaultedMoveAssignmentIsDeleted = true;
	if (FieldRec->hasNonTrivialDestructor())
	data().DefaultedDestructorIsDeleted = true;
	}

	// For an anonymous union member, our overload resolution will perform
	// overload resolution for its members.
	if (Field->isAnonymousStructOrUnion()) {
	+ data().NeedOverloadResolutionForCopyConstructor \|=
	+ FieldRec->data().NeedOverloadResolutionForCopyConstructor;
	data().NeedOverloadResolutionForMoveConstructor \|=
	FieldRec->data().NeedOverloadResolutionForMoveConstructor;
	data().NeedOverloadResolutionForMoveAssignment \|=
	FieldRec->data().NeedOverloadResolutionForMoveAssignment;
	data().NeedOverloadResolutionForDestructor \|=
	FieldRec->data().NeedOverloadResolutionForDestructor;
	}

	// C++0x [class.ctor]p5:
	// A default constructor is trivial [...] if:
	// -- for all the non-static data members of its class that are of
	// class type (or array thereof), each such class has a trivial
	// default constructor.
	if (!FieldRec->hasTrivialDefaultConstructor())
	data().HasTrivialSpecialMembers &= ~SMF_DefaultConstructor;

	// C++0x [class.copy]p13:
	// A copy/move constructor for class X is trivial if [...]
	// [...]
	// -- for each non-static data member of X that is of class type (or
	// an array thereof), the constructor selected to copy/move that
	// member is trivial;
	if (!FieldRec->hasTrivialCopyConstructor())
	data().HasTrivialSpecialMembers &= ~SMF_CopyConstructor;
	// If the field doesn't have a simple move constructor, we'll eagerly
	// declare the move constructor for this class and we'll decide whether
	// it's trivial then.
	if (!FieldRec->hasTrivialMoveConstructor())
	data().HasTrivialSpecialMembers &= ~SMF_MoveConstructor;

	// C++0x [class.copy]p27:
	// A copy/move assignment operator for class X is trivial if [...]
	// [...]
	// -- for each non-static data member of X that is of class type (or
	// an array thereof), the assignment operator selected to
	// copy/move that member is trivial;
	if (!FieldRec->hasTrivialCopyAssignment())
	data().HasTrivialSpecialMembers &= ~SMF_CopyAssignment;
	// If the field doesn't have a simple move assignment, we'll eagerly
	// declare the move assignment for this class and we'll decide whether
	// it's trivial then.
	if (!FieldRec->hasTrivialMoveAssignment())
	data().HasTrivialSpecialMembers &= ~SMF_MoveAssignment;

	if (!FieldRec->hasTrivialDestructor())
	data().HasTrivialSpecialMembers &= ~SMF_Destructor;
	if (!FieldRec->hasIrrelevantDestructor())
	data().HasIrrelevantDestructor = false;
	if (FieldRec->hasObjectMember())
	setHasObjectMember(true);
	if (FieldRec->hasVolatileMember())
	setHasVolatileMember(true);

	// C++0x [class]p7:
	// A standard-layout class is a class that:
	// -- has no non-static data members of type non-standard-layout
	// class (or array of such types) [...]
	if (!FieldRec->isStandardLayout())
	data().IsStandardLayout = false;

	// C++0x [class]p7:
	// A standard-layout class is a class that:
	// [...]
	// -- has no base classes of the same type as the first non-static
	// data member.
	// We don't want to expend bits in the state of the record decl
	// tracking whether this is the first non-static data member so we
	// cheat a bit and use some of the existing state: the empty bit.
	// Virtual bases and virtual methods make a class non-empty, but they
	// also make it non-standard-layout so we needn't check here.
	// A non-empty base class may leave the class standard-layout, but not
	// if we have arrived here, and have at least one non-static data
	// member. If IsStandardLayout remains true, then the first non-static
	// data member must come through here with Empty still true, and Empty
	// will subsequently be set to false below.
	if (data().IsStandardLayout && data().Empty) {
	for (const auto &BI : bases()) {
	if (Context.hasSameUnqualifiedType(BI.getType(), T)) {
	data().IsStandardLayout = false;
	break;
	}
	}
	}

	// Keep track of the presence of mutable fields.
	- if (FieldRec->hasMutableFields())
	+ if (FieldRec->hasMutableFields()) {
	data().HasMutableFields = true;
	+ data().NeedOverloadResolutionForCopyConstructor = true;
	+ }

	// C++11 [class.copy]p13:
	// If the implicitly-defined constructor would satisfy the
	// requirements of a constexpr constructor, the implicitly-defined
	// constructor is constexpr.
	// C++11 [dcl.constexpr]p4:
	// -- every constructor involved in initializing non-static data
	// members [...] shall be a constexpr constructor
	if (!Field->hasInClassInitializer() &&
	!FieldRec->hasConstexprDefaultConstructor() && !isUnion())
	// The standard requires any in-class initializer to be a constant
	// expression. We consider this to be a defect.
	data().DefaultedDefaultConstructorIsConstexpr = false;

	// C++11 [class.copy]p8:
	// The implicitly-declared copy constructor for a class X will have
	// the form 'X::X(const X&)' if each potentially constructed subobject
	// of a class type M (or array thereof) has a copy constructor whose
	// first parameter is of type 'const M&' or 'const volatile M&'.
	if (!FieldRec->hasCopyConstructorWithConstParam())
	data().ImplicitCopyConstructorCanHaveConstParamForNonVBase = false;

	// C++11 [class.copy]p18:
	// The implicitly-declared copy assignment oeprator for a class X will
	// have the form 'X& X::operator=(const X&)' if [...] for all the
	// non-static data members of X that are of a class type M (or array
	// thereof), each such class type has a copy assignment operator whose
	// parameter is of type 'const M&', 'const volatile M&' or 'M'.
	if (!FieldRec->hasCopyAssignmentWithConstParam())
	data().ImplicitCopyAssignmentHasConstParam = false;

	if (FieldRec->hasUninitializedReferenceMember() &&
	!Field->hasInClassInitializer())
	data().HasUninitializedReferenceMember = true;

	// C++11 [class.union]p8, DR1460:
	// a non-static data member of an anonymous union that is a member of
	// X is also a variant member of X.
	if (FieldRec->hasVariantMembers() &&
	Field->isAnonymousStructOrUnion())
	data().HasVariantMembers = true;
	}
	} else {
	// Base element type of field is a non-class type.
	if (!T->isLiteralType(Context) \|\|
	(!Field->hasInClassInitializer() && !isUnion()))
	data().DefaultedDefaultConstructorIsConstexpr = false;

	// C++11 [class.copy]p23:
	// A defaulted copy/move assignment operator for a class X is defined
	// as deleted if X has:
	// -- a non-static data member of const non-class type (or array
	// thereof)
	if (T.isConstQualified())
	data().DefaultedMoveAssignmentIsDeleted = true;
	}

	// C++0x [class]p7:
	// A standard-layout class is a class that:
	// [...]
	// -- either has no non-static data members in the most derived
	// class and at most one base class with non-static data members,
	// or has no base classes with non-static data members, and
	// At this point we know that we have a non-static data member, so the last
	// clause holds.
	if (!data().HasNoNonEmptyBases)
	data().IsStandardLayout = false;

	// C++14 [meta.unary.prop]p4:
	// T is a class type [...] with [...] no non-static data members other
	// than bit-fields of length 0...
	if (data().Empty) {
	if (!Field->isBitField() \|\|
	(!Field->getBitWidth()->isTypeDependent() &&
	!Field->getBitWidth()->isValueDependent() &&
	Field->getBitWidthValue(Context) != 0))
	data().Empty = false;
	}
	}

	// Handle using declarations of conversion functions.
	if (UsingShadowDecl *Shadow = dyn_cast<UsingShadowDecl>(D)) {
	if (Shadow->getDeclName().getNameKind()
	== DeclarationName::CXXConversionFunctionName) {
	ASTContext &Ctx = getASTContext();
	data().Conversions.get(Ctx).addDecl(Ctx, Shadow, Shadow->getAccess());
	}
	}

	if (UsingDecl *Using = dyn_cast<UsingDecl>(D)) {
	if (Using->getDeclName().getNameKind() ==
	DeclarationName::CXXConstructorName) {
	data().HasInheritedConstructor = true;
	// C++1z [dcl.init.aggr]p1:
	// An aggregate is [...] a class [...] with no inherited constructors
	data().Aggregate = false;
	}

	if (Using->getDeclName().getCXXOverloadedOperator() == OO_Equal)
	data().HasInheritedAssignment = true;
	}
	}

	void CXXRecordDecl::finishedDefaultedOrDeletedMember(CXXMethodDecl *D) {
	assert(!D->isImplicit() && !D->isUserProvided());

	// The kind of special member this declaration is, if any.
	unsigned SMKind = 0;

	if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(D)) {
	if (Constructor->isDefaultConstructor()) {
	SMKind \|= SMF_DefaultConstructor;
	if (Constructor->isConstexpr())
	data().HasConstexprDefaultConstructor = true;
	}
	if (Constructor->isCopyConstructor())
	SMKind \|= SMF_CopyConstructor;
	else if (Constructor->isMoveConstructor())
	SMKind \|= SMF_MoveConstructor;
	else if (Constructor->isConstexpr())
	// We may now know that the constructor is constexpr.
	data().HasConstexprNonCopyMoveConstructor = true;
	} else if (isa<CXXDestructorDecl>(D)) {
	SMKind \|= SMF_Destructor;
	if (!D->isTrivial() \|\| D->getAccess() != AS_public \|\| D->isDeleted())
	data().HasIrrelevantDestructor = false;
	} else if (D->isCopyAssignmentOperator())
	SMKind \|= SMF_CopyAssignment;
	else if (D->isMoveAssignmentOperator())
	SMKind \|= SMF_MoveAssignment;

	// Update which trivial / non-trivial special members we have.
	// addedMember will have skipped this step for this member.
	if (D->isTrivial())
	data().HasTrivialSpecialMembers \|= SMKind;
	else
	data().DeclaredNonTrivialSpecialMembers \|= SMKind;
	}

	bool CXXRecordDecl::isCLike() const {
	if (getTagKind() == TTK_Class \|\| getTagKind() == TTK_Interface \|\|
	!TemplateOrInstantiation.isNull())
	return false;
	if (!hasDefinition())
	return true;

	return isPOD() && data().HasOnlyCMembers;
	}

	bool CXXRecordDecl::isGenericLambda() const {
	if (!isLambda()) return false;
	return getLambdaData().IsGenericLambda;
	}

	CXXMethodDecl* CXXRecordDecl::getLambdaCallOperator() const {
	if (!isLambda()) return nullptr;
	DeclarationName Name =
	getASTContext().DeclarationNames.getCXXOperatorName(OO_Call);
	DeclContext::lookup_result Calls = lookup(Name);

	assert(!Calls.empty() && "Missing lambda call operator!");
	assert(Calls.size() == 1 && "More than one lambda call operator!");

	NamedDecl *CallOp = Calls.front();
	if (FunctionTemplateDecl *CallOpTmpl =
	dyn_cast<FunctionTemplateDecl>(CallOp))
	return cast<CXXMethodDecl>(CallOpTmpl->getTemplatedDecl());

	return cast<CXXMethodDecl>(CallOp);
	}

	CXXMethodDecl* CXXRecordDecl::getLambdaStaticInvoker() const {
	if (!isLambda()) return nullptr;
	DeclarationName Name =
	&getASTContext().Idents.get(getLambdaStaticInvokerName());
	DeclContext::lookup_result Invoker = lookup(Name);
	if (Invoker.empty()) return nullptr;
	assert(Invoker.size() == 1 && "More than one static invoker operator!");
	NamedDecl *InvokerFun = Invoker.front();
	if (FunctionTemplateDecl *InvokerTemplate =
	dyn_cast<FunctionTemplateDecl>(InvokerFun))
	return cast<CXXMethodDecl>(InvokerTemplate->getTemplatedDecl());

	return cast<CXXMethodDecl>(InvokerFun);
	}

	void CXXRecordDecl::getCaptureFields(
	llvm::DenseMap<const VarDecl , FieldDecl > &Captures,
	FieldDecl *&ThisCapture) const {
	Captures.clear();
	ThisCapture = nullptr;

	LambdaDefinitionData &Lambda = getLambdaData();
	RecordDecl::field_iterator Field = field_begin();
	for (const LambdaCapture C = Lambda.Captures, CEnd = C + Lambda.NumCaptures;
	C != CEnd; ++C, ++Field) {
	if (C->capturesThis())
	ThisCapture = *Field;
	else if (C->capturesVariable())
	Captures[C->getCapturedVar()] = *Field;
	}
	assert(Field == field_end());
	}

	TemplateParameterList *
	CXXRecordDecl::getGenericLambdaTemplateParameterList() const {
	if (!isLambda()) return nullptr;
	CXXMethodDecl *CallOp = getLambdaCallOperator();
	if (FunctionTemplateDecl *Tmpl = CallOp->getDescribedFunctionTemplate())
	return Tmpl->getTemplateParameters();
	return nullptr;
	}

	Decl *CXXRecordDecl::getLambdaContextDecl() const {
	assert(isLambda() && "Not a lambda closure type!");
	ExternalASTSource *Source = getParentASTContext().getExternalSource();
	return getLambdaData().ContextDecl.get(Source);
	}

	static CanQualType GetConversionType(ASTContext &Context, NamedDecl *Conv) {
	QualType T =
	cast<CXXConversionDecl>(Conv->getUnderlyingDecl()->getAsFunction())
	->getConversionType();
	return Context.getCanonicalType(T);
	}

	/// Collect the visible conversions of a base class.
	///
	/// \param Record a base class of the class we're considering
	/// \param InVirtual whether this base class is a virtual base (or a base
	/// of a virtual base)
	/// \param Access the access along the inheritance path to this base
	/// \param ParentHiddenTypes the conversions provided by the inheritors
	/// of this base
	/// \param Output the set to which to add conversions from non-virtual bases
	/// \param VOutput the set to which to add conversions from virtual bases
	/// \param HiddenVBaseCs the set of conversions which were hidden in a
	/// virtual base along some inheritance path
	static void CollectVisibleConversions(ASTContext &Context,
	CXXRecordDecl *Record,
	bool InVirtual,
	AccessSpecifier Access,
	const llvm::SmallPtrSet<CanQualType, 8> &ParentHiddenTypes,
	ASTUnresolvedSet &Output,
	UnresolvedSetImpl &VOutput,
	llvm::SmallPtrSet<NamedDecl*, 8> &HiddenVBaseCs) {
	// The set of types which have conversions in this class or its
	// subclasses. As an optimization, we don't copy the derived set
	// unless it might change.
	const llvm::SmallPtrSet<CanQualType, 8> *HiddenTypes = &ParentHiddenTypes;
	llvm::SmallPtrSet<CanQualType, 8> HiddenTypesBuffer;

	// Collect the direct conversions and figure out which conversions
	// will be hidden in the subclasses.
	CXXRecordDecl::conversion_iterator ConvI = Record->conversion_begin();
	CXXRecordDecl::conversion_iterator ConvE = Record->conversion_end();
	if (ConvI != ConvE) {
	HiddenTypesBuffer = ParentHiddenTypes;
	HiddenTypes = &HiddenTypesBuffer;

	for (CXXRecordDecl::conversion_iterator I = ConvI; I != ConvE; ++I) {
	CanQualType ConvType(GetConversionType(Context, I.getDecl()));
	bool Hidden = ParentHiddenTypes.count(ConvType);
	if (!Hidden)
	HiddenTypesBuffer.insert(ConvType);

	// If this conversion is hidden and we're in a virtual base,
	// remember that it's hidden along some inheritance path.
	if (Hidden && InVirtual)
	HiddenVBaseCs.insert(cast<NamedDecl>(I.getDecl()->getCanonicalDecl()));

	// If this conversion isn't hidden, add it to the appropriate output.
	else if (!Hidden) {
	AccessSpecifier IAccess
	= CXXRecordDecl::MergeAccess(Access, I.getAccess());

	if (InVirtual)
	VOutput.addDecl(I.getDecl(), IAccess);
	else
	Output.addDecl(Context, I.getDecl(), IAccess);
	}
	}
	}

	// Collect information recursively from any base classes.
	for (const auto &I : Record->bases()) {
	const RecordType *RT = I.getType()->getAs<RecordType>();
	if (!RT) continue;

	AccessSpecifier BaseAccess
	= CXXRecordDecl::MergeAccess(Access, I.getAccessSpecifier());
	bool BaseInVirtual = InVirtual \|\| I.isVirtual();

	CXXRecordDecl *Base = cast<CXXRecordDecl>(RT->getDecl());
	CollectVisibleConversions(Context, Base, BaseInVirtual, BaseAccess,
	*HiddenTypes, Output, VOutput, HiddenVBaseCs);
	}
	}

	/// Collect the visible conversions of a class.
	///
	/// This would be extremely straightforward if it weren't for virtual
	/// bases. It might be worth special-casing that, really.
	static void CollectVisibleConversions(ASTContext &Context,
	CXXRecordDecl *Record,
	ASTUnresolvedSet &Output) {
	// The collection of all conversions in virtual bases that we've
	// found. These will be added to the output as long as they don't
	// appear in the hidden-conversions set.
	UnresolvedSet<8> VBaseCs;

	// The set of conversions in virtual bases that we've determined to
	// be hidden.
	llvm::SmallPtrSet<NamedDecl*, 8> HiddenVBaseCs;

	// The set of types hidden by classes derived from this one.
	llvm::SmallPtrSet<CanQualType, 8> HiddenTypes;

	// Go ahead and collect the direct conversions and add them to the
	// hidden-types set.
	CXXRecordDecl::conversion_iterator ConvI = Record->conversion_begin();
	CXXRecordDecl::conversion_iterator ConvE = Record->conversion_end();
	Output.append(Context, ConvI, ConvE);
	for (; ConvI != ConvE; ++ConvI)
	HiddenTypes.insert(GetConversionType(Context, ConvI.getDecl()));

	// Recursively collect conversions from base classes.
	for (const auto &I : Record->bases()) {
	const RecordType *RT = I.getType()->getAs<RecordType>();
	if (!RT) continue;

	CollectVisibleConversions(Context, cast<CXXRecordDecl>(RT->getDecl()),
	I.isVirtual(), I.getAccessSpecifier(),
	HiddenTypes, Output, VBaseCs, HiddenVBaseCs);
	}

	// Add any unhidden conversions provided by virtual bases.
	for (UnresolvedSetIterator I = VBaseCs.begin(), E = VBaseCs.end();
	I != E; ++I) {
	if (!HiddenVBaseCs.count(cast<NamedDecl>(I.getDecl()->getCanonicalDecl())))
	Output.addDecl(Context, I.getDecl(), I.getAccess());
	}
	}

	/// getVisibleConversionFunctions - get all conversion functions visible
	/// in current class; including conversion function templates.
	llvm::iterator_range<CXXRecordDecl::conversion_iterator>
	CXXRecordDecl::getVisibleConversionFunctions() {
	ASTContext &Ctx = getASTContext();

	ASTUnresolvedSet *Set;
	if (bases_begin() == bases_end()) {
	// If root class, all conversions are visible.
	Set = &data().Conversions.get(Ctx);
	} else {
	Set = &data().VisibleConversions.get(Ctx);
	// If visible conversion list is not evaluated, evaluate it.
	if (!data().ComputedVisibleConversions) {
	CollectVisibleConversions(Ctx, this, *Set);
	data().ComputedVisibleConversions = true;
	}
	}
	return llvm::make_range(Set->begin(), Set->end());
	}

	void CXXRecordDecl::removeConversion(const NamedDecl *ConvDecl) {
	// This operation is O(N) but extremely rare. Sema only uses it to
	// remove UsingShadowDecls in a class that were followed by a direct
	// declaration, e.g.:
	// class A : B {
	// using B::operator int;
	// operator int();
	// };
	// This is uncommon by itself and even more uncommon in conjunction
	// with sufficiently large numbers of directly-declared conversions
	// that asymptotic behavior matters.

	ASTUnresolvedSet &Convs = data().Conversions.get(getASTContext());
	for (unsigned I = 0, E = Convs.size(); I != E; ++I) {
	if (Convs[I].getDecl() == ConvDecl) {
	Convs.erase(I);
	assert(std::find(Convs.begin(), Convs.end(), ConvDecl) == Convs.end()
	&& "conversion was found multiple times in unresolved set");
	return;
	}
	}

	llvm_unreachable("conversion not found in set!");
	}

	CXXRecordDecl *CXXRecordDecl::getInstantiatedFromMemberClass() const {
	if (MemberSpecializationInfo *MSInfo = getMemberSpecializationInfo())
	return cast<CXXRecordDecl>(MSInfo->getInstantiatedFrom());

	return nullptr;
	}

	MemberSpecializationInfo *CXXRecordDecl::getMemberSpecializationInfo() const {
	return TemplateOrInstantiation.dyn_cast<MemberSpecializationInfo *>();
	}

	void
	CXXRecordDecl::setInstantiationOfMemberClass(CXXRecordDecl *RD,
	TemplateSpecializationKind TSK) {
	assert(TemplateOrInstantiation.isNull() &&
	"Previous template or instantiation?");
	assert(!isa<ClassTemplatePartialSpecializationDecl>(this));
	TemplateOrInstantiation
	= new (getASTContext()) MemberSpecializationInfo(RD, TSK);
	}

	ClassTemplateDecl *CXXRecordDecl::getDescribedClassTemplate() const {
	return TemplateOrInstantiation.dyn_cast<ClassTemplateDecl *>();
	}

	void CXXRecordDecl::setDescribedClassTemplate(ClassTemplateDecl *Template) {
	TemplateOrInstantiation = Template;
	}

	TemplateSpecializationKind CXXRecordDecl::getTemplateSpecializationKind() const{
	if (const ClassTemplateSpecializationDecl *Spec
	= dyn_cast<ClassTemplateSpecializationDecl>(this))
	return Spec->getSpecializationKind();

	if (MemberSpecializationInfo *MSInfo = getMemberSpecializationInfo())
	return MSInfo->getTemplateSpecializationKind();

	return TSK_Undeclared;
	}

	void
	CXXRecordDecl::setTemplateSpecializationKind(TemplateSpecializationKind TSK) {
	if (ClassTemplateSpecializationDecl *Spec
	= dyn_cast<ClassTemplateSpecializationDecl>(this)) {
	Spec->setSpecializationKind(TSK);
	return;
	}

	if (MemberSpecializationInfo *MSInfo = getMemberSpecializationInfo()) {
	MSInfo->setTemplateSpecializationKind(TSK);
	return;
	}

	llvm_unreachable("Not a class template or member class specialization");
	}

	const CXXRecordDecl *CXXRecordDecl::getTemplateInstantiationPattern() const {
	auto GetDefinitionOrSelf =
	[](const CXXRecordDecl D) -> const CXXRecordDecl {
	if (auto *Def = D->getDefinition())
	return Def;
	return D;
	};

	// If it's a class template specialization, find the template or partial
	// specialization from which it was instantiated.
	if (auto *TD = dyn_cast<ClassTemplateSpecializationDecl>(this)) {
	auto From = TD->getInstantiatedFrom();
	if (auto CTD = From.dyn_cast<ClassTemplateDecl >()) {
	while (auto *NewCTD = CTD->getInstantiatedFromMemberTemplate()) {
	if (NewCTD->isMemberSpecialization())
	break;
	CTD = NewCTD;
	}
	return GetDefinitionOrSelf(CTD->getTemplatedDecl());
	}
	if (auto *CTPSD =
	From.dyn_cast<ClassTemplatePartialSpecializationDecl *>()) {
	while (auto *NewCTPSD = CTPSD->getInstantiatedFromMember()) {
	if (NewCTPSD->isMemberSpecialization())
	break;
	CTPSD = NewCTPSD;
	}
	return GetDefinitionOrSelf(CTPSD);
	}
	}

	if (MemberSpecializationInfo *MSInfo = getMemberSpecializationInfo()) {
	if (isTemplateInstantiation(MSInfo->getTemplateSpecializationKind())) {
	const CXXRecordDecl *RD = this;
	while (auto *NewRD = RD->getInstantiatedFromMemberClass())
	RD = NewRD;
	return GetDefinitionOrSelf(RD);
	}
	}

	assert(!isTemplateInstantiation(this->getTemplateSpecializationKind()) &&
	"couldn't find pattern for class template instantiation");
	return nullptr;
	}

	CXXDestructorDecl *CXXRecordDecl::getDestructor() const {
	ASTContext &Context = getASTContext();
	QualType ClassType = Context.getTypeDeclType(this);

	DeclarationName Name
	= Context.DeclarationNames.getCXXDestructorName(
	Context.getCanonicalType(ClassType));

	DeclContext::lookup_result R = lookup(Name);

	return R.empty() ? nullptr : dyn_cast<CXXDestructorDecl>(R.front());
	}

	bool CXXRecordDecl::isAnyDestructorNoReturn() const {
	// Destructor is noreturn.
	if (const CXXDestructorDecl *Destructor = getDestructor())
	if (Destructor->isNoReturn())
	return true;

	// Check base classes destructor for noreturn.
	for (const auto &Base : bases())
	if (const CXXRecordDecl *RD = Base.getType()->getAsCXXRecordDecl())
	if (RD->isAnyDestructorNoReturn())
	return true;

	// Check fields for noreturn.
	for (const auto *Field : fields())
	if (const CXXRecordDecl *RD =
	Field->getType()->getBaseElementTypeUnsafe()->getAsCXXRecordDecl())
	if (RD->isAnyDestructorNoReturn())
	return true;

	// All destructors are not noreturn.
	return false;
	}

	void CXXRecordDecl::completeDefinition() {
	completeDefinition(nullptr);
	}

	void CXXRecordDecl::completeDefinition(CXXFinalOverriderMap *FinalOverriders) {
	RecordDecl::completeDefinition();
	-
	+
	// If the class may be abstract (but hasn't been marked as such), check for
	// any pure final overriders.
	if (mayBeAbstract()) {
	CXXFinalOverriderMap MyFinalOverriders;
	if (!FinalOverriders) {
	getFinalOverriders(MyFinalOverriders);
	FinalOverriders = &MyFinalOverriders;
	}

	bool Done = false;
	for (CXXFinalOverriderMap::iterator M = FinalOverriders->begin(),
	MEnd = FinalOverriders->end();
	M != MEnd && !Done; ++M) {
	for (OverridingMethods::iterator SO = M->second.begin(),
	SOEnd = M->second.end();
	SO != SOEnd && !Done; ++SO) {
	assert(SO->second.size() > 0 &&
	"All virtual functions have overridding virtual functions");

	// C++ [class.abstract]p4:
	// A class is abstract if it contains or inherits at least one
	// pure virtual function for which the final overrider is pure
	// virtual.
	if (SO->second.front().Method->isPure()) {
	data().Abstract = true;
	Done = true;
	break;
	}
	}
	}
	}

	// Set access bits correctly on the directly-declared conversions.
	for (conversion_iterator I = conversion_begin(), E = conversion_end();
	I != E; ++I)
	I.setAccess((*I)->getAccess());
	}

	bool CXXRecordDecl::mayBeAbstract() const {
	if (data().Abstract \|\| isInvalidDecl() \|\| !data().Polymorphic \|\|
	isDependentContext())
	return false;

	for (const auto &B : bases()) {
	CXXRecordDecl *BaseDecl
	= cast<CXXRecordDecl>(B.getType()->getAs<RecordType>()->getDecl());
	if (BaseDecl->isAbstract())
	return true;
	}

	return false;
	}

	void CXXDeductionGuideDecl::anchor() { }

	CXXDeductionGuideDecl *CXXDeductionGuideDecl::Create(
	ASTContext &C, DeclContext *DC, SourceLocation StartLoc, bool IsExplicit,
	const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
	SourceLocation EndLocation) {
	return new (C, DC) CXXDeductionGuideDecl(C, DC, StartLoc, IsExplicit,
	NameInfo, T, TInfo, EndLocation);
	}

	CXXDeductionGuideDecl *CXXDeductionGuideDecl::CreateDeserialized(ASTContext &C,
	unsigned ID) {
	return new (C, ID) CXXDeductionGuideDecl(C, nullptr, SourceLocation(), false,
	DeclarationNameInfo(), QualType(),
	nullptr, SourceLocation());
	}

	void CXXMethodDecl::anchor() { }

	bool CXXMethodDecl::isStatic() const {
	const CXXMethodDecl *MD = getCanonicalDecl();

	if (MD->getStorageClass() == SC_Static)
	return true;

	OverloadedOperatorKind OOK = getDeclName().getCXXOverloadedOperator();
	return isStaticOverloadedOperator(OOK);
	}

	static bool recursivelyOverrides(const CXXMethodDecl *DerivedMD,
	const CXXMethodDecl *BaseMD) {
	for (CXXMethodDecl::method_iterator I = DerivedMD->begin_overridden_methods(),
	E = DerivedMD->end_overridden_methods(); I != E; ++I) {
	const CXXMethodDecl MD = I;
	if (MD->getCanonicalDecl() == BaseMD->getCanonicalDecl())
	return true;
	if (recursivelyOverrides(MD, BaseMD))
	return true;
	}
	return false;
	}

	CXXMethodDecl *
	CXXMethodDecl::getCorrespondingMethodInClass(const CXXRecordDecl *RD,
	bool MayBeBase) {
	if (this->getParent()->getCanonicalDecl() == RD->getCanonicalDecl())
	return this;

	// Lookup doesn't work for destructors, so handle them separately.
	if (isa<CXXDestructorDecl>(this)) {
	CXXMethodDecl *MD = RD->getDestructor();
	if (MD) {
	if (recursivelyOverrides(MD, this))
	return MD;
	if (MayBeBase && recursivelyOverrides(this, MD))
	return MD;
	}
	return nullptr;
	}

	for (auto *ND : RD->lookup(getDeclName())) {
	CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(ND);
	if (!MD)
	continue;
	if (recursivelyOverrides(MD, this))
	return MD;
	if (MayBeBase && recursivelyOverrides(this, MD))
	return MD;
	}

	for (const auto &I : RD->bases()) {
	const RecordType *RT = I.getType()->getAs<RecordType>();
	if (!RT)
	continue;
	const CXXRecordDecl *Base = cast<CXXRecordDecl>(RT->getDecl());
	CXXMethodDecl *T = this->getCorrespondingMethodInClass(Base);
	if (T)
	return T;
	}

	return nullptr;
	}

	CXXMethodDecl *
	CXXMethodDecl::Create(ASTContext &C, CXXRecordDecl *RD,
	SourceLocation StartLoc,
	const DeclarationNameInfo &NameInfo,
	QualType T, TypeSourceInfo *TInfo,
	StorageClass SC, bool isInline,
	bool isConstexpr, SourceLocation EndLocation) {
	return new (C, RD) CXXMethodDecl(CXXMethod, C, RD, StartLoc, NameInfo,
	T, TInfo, SC, isInline, isConstexpr,
	EndLocation);
	}

	CXXMethodDecl *CXXMethodDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
	return new (C, ID) CXXMethodDecl(CXXMethod, C, nullptr, SourceLocation(),
	DeclarationNameInfo(), QualType(), nullptr,
	SC_None, false, false, SourceLocation());
	}

	CXXMethodDecl CXXMethodDecl::getDevirtualizedMethod(const Expr Base,
	bool IsAppleKext) {
	assert(isVirtual() && "this method is expected to be virtual");

	// When building with -fapple-kext, all calls must go through the vtable since
	// the kernel linker can do runtime patching of vtables.
	if (IsAppleKext)
	return nullptr;

	// If the member function is marked 'final', we know that it can't be
	// overridden and can therefore devirtualize it unless it's pure virtual.
	if (hasAttr<FinalAttr>())
	return isPure() ? nullptr : this;

	// If Base is unknown, we cannot devirtualize.
	if (!Base)
	return nullptr;

	// If the base expression (after skipping derived-to-base conversions) is a
	// class prvalue, then we can devirtualize.
	Base = Base->getBestDynamicClassTypeExpr();
	if (Base->isRValue() && Base->getType()->isRecordType())
	return this;

	// If we don't even know what we would call, we can't devirtualize.
	const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType();
	if (!BestDynamicDecl)
	return nullptr;

	// There may be a method corresponding to MD in a derived class.
	CXXMethodDecl *DevirtualizedMethod =
	getCorrespondingMethodInClass(BestDynamicDecl);

	// If that method is pure virtual, we can't devirtualize. If this code is
	// reached, the result would be UB, not a direct call to the derived class
	// function, and we can't assume the derived class function is defined.
	if (DevirtualizedMethod->isPure())
	return nullptr;

	// If that method is marked final, we can devirtualize it.
	if (DevirtualizedMethod->hasAttr<FinalAttr>())
	return DevirtualizedMethod;

	// Similarly, if the class itself is marked 'final' it can't be overridden
	// and we can therefore devirtualize the member function call.
	if (BestDynamicDecl->hasAttr<FinalAttr>())
	return DevirtualizedMethod;

	if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Base)) {
	if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()))
	if (VD->getType()->isRecordType())
	// This is a record decl. We know the type and can devirtualize it.
	return DevirtualizedMethod;

	return nullptr;
	}

	// We can devirtualize calls on an object accessed by a class member access
	// expression, since by C++11 [basic.life]p6 we know that it can't refer to
	// a derived class object constructed in the same location.
	if (const MemberExpr *ME = dyn_cast<MemberExpr>(Base))
	if (const ValueDecl *VD = dyn_cast<ValueDecl>(ME->getMemberDecl()))
	return VD->getType()->isRecordType() ? DevirtualizedMethod : nullptr;

	// Likewise for calls on an object accessed by a (non-reference) pointer to
	// member access.
	if (auto *BO = dyn_cast<BinaryOperator>(Base)) {
	if (BO->isPtrMemOp()) {
	auto *MPT = BO->getRHS()->getType()->castAs<MemberPointerType>();
	if (MPT->getPointeeType()->isRecordType())
	return DevirtualizedMethod;
	}
	}

	// We can't devirtualize the call.
	return nullptr;
	}

	bool CXXMethodDecl::isUsualDeallocationFunction() const {
	if (getOverloadedOperator() != OO_Delete &&
	getOverloadedOperator() != OO_Array_Delete)
	return false;

	// C++ [basic.stc.dynamic.deallocation]p2:
	// A template instance is never a usual deallocation function,
	// regardless of its signature.
	if (getPrimaryTemplate())
	return false;

	// C++ [basic.stc.dynamic.deallocation]p2:
	// If a class T has a member deallocation function named operator delete
	// with exactly one parameter, then that function is a usual (non-placement)
	// deallocation function. [...]
	if (getNumParams() == 1)
	return true;
	unsigned UsualParams = 1;

	// C++ <=14 [basic.stc.dynamic.deallocation]p2:
	// [...] If class T does not declare such an operator delete but does
	// declare a member deallocation function named operator delete with
	// exactly two parameters, the second of which has type std::size_t (18.1),
	// then this function is a usual deallocation function.
	//
	// C++17 says a usual deallocation function is one with the signature
	// (void* [, size_t] [, std::align_val_t] [, ...])
	// and all such functions are usual deallocation functions. It's not clear
	// that allowing varargs functions was intentional.
	ASTContext &Context = getASTContext();
	if (UsualParams < getNumParams() &&
	Context.hasSameUnqualifiedType(getParamDecl(UsualParams)->getType(),
	Context.getSizeType()))
	++UsualParams;

	if (UsualParams < getNumParams() &&
	getParamDecl(UsualParams)->getType()->isAlignValT())
	++UsualParams;

	if (UsualParams != getNumParams())
	return false;

	// In C++17 onwards, all potential usual deallocation functions are actual
	// usual deallocation functions.
	if (Context.getLangOpts().AlignedAllocation)
	return true;

	// This function is a usual deallocation function if there are no
	// single-parameter deallocation functions of the same kind.
	DeclContext::lookup_result R = getDeclContext()->lookup(getDeclName());
	for (DeclContext::lookup_result::iterator I = R.begin(), E = R.end();
	I != E; ++I) {
	if (const FunctionDecl FD = dyn_cast<FunctionDecl>(I))
	if (FD->getNumParams() == 1)
	return false;
	}

	return true;
	}

	bool CXXMethodDecl::isCopyAssignmentOperator() const {
	// C++0x [class.copy]p17:
	// A user-declared copy assignment operator X::operator= is a non-static
	// non-template member function of class X with exactly one parameter of
	// type X, X&, const X&, volatile X& or const volatile X&.
	if (/operator=/getOverloadedOperator() != OO_Equal \|\|
	/non-static/ isStatic() \|\|
	/non-template/getPrimaryTemplate() \|\| getDescribedFunctionTemplate() \|\|
	getNumParams() != 1)
	return false;

	QualType ParamType = getParamDecl(0)->getType();
	if (const LValueReferenceType *Ref = ParamType->getAs<LValueReferenceType>())
	ParamType = Ref->getPointeeType();

	ASTContext &Context = getASTContext();
	QualType ClassType
	= Context.getCanonicalType(Context.getTypeDeclType(getParent()));
	return Context.hasSameUnqualifiedType(ClassType, ParamType);
	}

	bool CXXMethodDecl::isMoveAssignmentOperator() const {
	// C++0x [class.copy]p19:
	// A user-declared move assignment operator X::operator= is a non-static
	// non-template member function of class X with exactly one parameter of type
	// X&&, const X&&, volatile X&&, or const volatile X&&.
	if (getOverloadedOperator() != OO_Equal \|\| isStatic() \|\|
	getPrimaryTemplate() \|\| getDescribedFunctionTemplate() \|\|
	getNumParams() != 1)
	return false;

	QualType ParamType = getParamDecl(0)->getType();
	if (!isa<RValueReferenceType>(ParamType))
	return false;
	ParamType = ParamType->getPointeeType();

	ASTContext &Context = getASTContext();
	QualType ClassType
	= Context.getCanonicalType(Context.getTypeDeclType(getParent()));
	return Context.hasSameUnqualifiedType(ClassType, ParamType);
	}

	void CXXMethodDecl::addOverriddenMethod(const CXXMethodDecl *MD) {
	assert(MD->isCanonicalDecl() && "Method is not canonical!");
	assert(!MD->getParent()->isDependentContext() &&
	"Can't add an overridden method to a class template!");
	assert(MD->isVirtual() && "Method is not virtual!");

	getASTContext().addOverriddenMethod(this, MD);
	}

	CXXMethodDecl::method_iterator CXXMethodDecl::begin_overridden_methods() const {
	if (isa<CXXConstructorDecl>(this)) return nullptr;
	return getASTContext().overridden_methods_begin(this);
	}

	CXXMethodDecl::method_iterator CXXMethodDecl::end_overridden_methods() const {
	if (isa<CXXConstructorDecl>(this)) return nullptr;
	return getASTContext().overridden_methods_end(this);
	}

	unsigned CXXMethodDecl::size_overridden_methods() const {
	if (isa<CXXConstructorDecl>(this)) return 0;
	return getASTContext().overridden_methods_size(this);
	}

	CXXMethodDecl::overridden_method_range
	CXXMethodDecl::overridden_methods() const {
	if (isa<CXXConstructorDecl>(this))
	return overridden_method_range(nullptr, nullptr);
	return getASTContext().overridden_methods(this);
	}

	QualType CXXMethodDecl::getThisType(ASTContext &C) const {
	// C++ 9.3.2p1: The type of this in a member function of a class X is X*.
	// If the member function is declared const, the type of this is const X*,
	// if the member function is declared volatile, the type of this is
	// volatile X*, and if the member function is declared const volatile,
	// the type of this is const volatile X*.

	assert(isInstance() && "No 'this' for static methods!");

	QualType ClassTy = C.getTypeDeclType(getParent());
	ClassTy = C.getQualifiedType(ClassTy,
	Qualifiers::fromCVRUMask(getTypeQualifiers()));
	return C.getPointerType(ClassTy);
	}

	bool CXXMethodDecl::hasInlineBody() const {
	// If this function is a template instantiation, look at the template from
	// which it was instantiated.
	const FunctionDecl *CheckFn = getTemplateInstantiationPattern();
	if (!CheckFn)
	CheckFn = this;

	const FunctionDecl *fn;
	return CheckFn->isDefined(fn) && !fn->isOutOfLine() &&
	(fn->doesThisDeclarationHaveABody() \|\| fn->willHaveBody());
	}

	bool CXXMethodDecl::isLambdaStaticInvoker() const {
	const CXXRecordDecl *P = getParent();
	if (P->isLambda()) {
	if (const CXXMethodDecl *StaticInvoker = P->getLambdaStaticInvoker()) {
	if (StaticInvoker == this) return true;
	if (P->isGenericLambda() && this->isFunctionTemplateSpecialization())
	return StaticInvoker == this->getPrimaryTemplate()->getTemplatedDecl();
	}
	}
	return false;
	}

	CXXCtorInitializer::CXXCtorInitializer(ASTContext &Context,
	TypeSourceInfo *TInfo, bool IsVirtual,
	SourceLocation L, Expr *Init,
	SourceLocation R,
	SourceLocation EllipsisLoc)
	: Initializee(TInfo), MemberOrEllipsisLocation(EllipsisLoc), Init(Init),
	LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(IsVirtual),
	IsWritten(false), SourceOrder(0)
	{
	}

	CXXCtorInitializer::CXXCtorInitializer(ASTContext &Context,
	FieldDecl *Member,
	SourceLocation MemberLoc,
	SourceLocation L, Expr *Init,
	SourceLocation R)
	: Initializee(Member), MemberOrEllipsisLocation(MemberLoc), Init(Init),
	LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(false),
	IsWritten(false), SourceOrder(0)
	{
	}

	CXXCtorInitializer::CXXCtorInitializer(ASTContext &Context,
	IndirectFieldDecl *Member,
	SourceLocation MemberLoc,
	SourceLocation L, Expr *Init,
	SourceLocation R)
	: Initializee(Member), MemberOrEllipsisLocation(MemberLoc), Init(Init),
	LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(false),
	IsWritten(false), SourceOrder(0)
	{
	}

	CXXCtorInitializer::CXXCtorInitializer(ASTContext &Context,
	TypeSourceInfo *TInfo,
	SourceLocation L, Expr *Init,
	SourceLocation R)
	: Initializee(TInfo), MemberOrEllipsisLocation(), Init(Init),
	LParenLoc(L), RParenLoc(R), IsDelegating(true), IsVirtual(false),
	IsWritten(false), SourceOrder(0)
	{
	}

	TypeLoc CXXCtorInitializer::getBaseClassLoc() const {
	if (isBaseInitializer())
	return Initializee.get<TypeSourceInfo*>()->getTypeLoc();
	else
	return TypeLoc();
	}

	const Type *CXXCtorInitializer::getBaseClass() const {
	if (isBaseInitializer())
	return Initializee.get<TypeSourceInfo*>()->getType().getTypePtr();
	else
	return nullptr;
	}

	SourceLocation CXXCtorInitializer::getSourceLocation() const {
	if (isInClassMemberInitializer())
	return getAnyMember()->getLocation();

	if (isAnyMemberInitializer())
	return getMemberLocation();

	if (TypeSourceInfo TSInfo = Initializee.get<TypeSourceInfo>())
	return TSInfo->getTypeLoc().getLocalSourceRange().getBegin();

	return SourceLocation();
	}

	SourceRange CXXCtorInitializer::getSourceRange() const {
	if (isInClassMemberInitializer()) {
	FieldDecl *D = getAnyMember();
	if (Expr *I = D->getInClassInitializer())
	return I->getSourceRange();
	return SourceRange();
	}

	return SourceRange(getSourceLocation(), getRParenLoc());
	}

	void CXXConstructorDecl::anchor() { }

	CXXConstructorDecl *CXXConstructorDecl::CreateDeserialized(ASTContext &C,
	unsigned ID,
	bool Inherited) {
	unsigned Extra = additionalSizeToAlloc<InheritedConstructor>(Inherited);
	auto *Result = new (C, ID, Extra) CXXConstructorDecl(
	C, nullptr, SourceLocation(), DeclarationNameInfo(), QualType(), nullptr,
	false, false, false, false, InheritedConstructor());
	Result->IsInheritingConstructor = Inherited;
	return Result;
	}

	CXXConstructorDecl *
	CXXConstructorDecl::Create(ASTContext &C, CXXRecordDecl *RD,
	SourceLocation StartLoc,
	const DeclarationNameInfo &NameInfo,
	QualType T, TypeSourceInfo *TInfo,
	bool isExplicit, bool isInline,
	bool isImplicitlyDeclared, bool isConstexpr,
	InheritedConstructor Inherited) {
	assert(NameInfo.getName().getNameKind()
	== DeclarationName::CXXConstructorName &&
	"Name must refer to a constructor");
	unsigned Extra =
	additionalSizeToAlloc<InheritedConstructor>(Inherited ? 1 : 0);
	return new (C, RD, Extra) CXXConstructorDecl(
	C, RD, StartLoc, NameInfo, T, TInfo, isExplicit, isInline,
	isImplicitlyDeclared, isConstexpr, Inherited);
	}

	CXXConstructorDecl::init_const_iterator CXXConstructorDecl::init_begin() const {
	return CtorInitializers.get(getASTContext().getExternalSource());
	}

	CXXConstructorDecl *CXXConstructorDecl::getTargetConstructor() const {
	assert(isDelegatingConstructor() && "Not a delegating constructor!");
	Expr E = (init_begin())->getInit()->IgnoreImplicit();
	if (CXXConstructExpr *Construct = dyn_cast<CXXConstructExpr>(E))
	return Construct->getConstructor();

	return nullptr;
	}

	bool CXXConstructorDecl::isDefaultConstructor() const {
	// C++ [class.ctor]p5:
	// A default constructor for a class X is a constructor of class
	// X that can be called without an argument.
	return (getNumParams() == 0) \|\|
	(getNumParams() > 0 && getParamDecl(0)->hasDefaultArg());
	}

	bool
	CXXConstructorDecl::isCopyConstructor(unsigned &TypeQuals) const {
	return isCopyOrMoveConstructor(TypeQuals) &&
	getParamDecl(0)->getType()->isLValueReferenceType();
	}

	bool CXXConstructorDecl::isMoveConstructor(unsigned &TypeQuals) const {
	return isCopyOrMoveConstructor(TypeQuals) &&
	getParamDecl(0)->getType()->isRValueReferenceType();
	}

	/// \brief Determine whether this is a copy or move constructor.
	bool CXXConstructorDecl::isCopyOrMoveConstructor(unsigned &TypeQuals) const {
	// C++ [class.copy]p2:
	// A non-template constructor for class X is a copy constructor
	// if its first parameter is of type X&, const X&, volatile X& or
	// const volatile X&, and either there are no other parameters
	// or else all other parameters have default arguments (8.3.6).
	// C++0x [class.copy]p3:
	// A non-template constructor for class X is a move constructor if its
	// first parameter is of type X&&, const X&&, volatile X&&, or
	// const volatile X&&, and either there are no other parameters or else
	// all other parameters have default arguments.
	if ((getNumParams() < 1) \|\|
	(getNumParams() > 1 && !getParamDecl(1)->hasDefaultArg()) \|\|
	(getPrimaryTemplate() != nullptr) \|\|
	(getDescribedFunctionTemplate() != nullptr))
	return false;

	const ParmVarDecl *Param = getParamDecl(0);

	// Do we have a reference type?
	const ReferenceType *ParamRefType = Param->getType()->getAs<ReferenceType>();
	if (!ParamRefType)
	return false;

	// Is it a reference to our class type?
	ASTContext &Context = getASTContext();

	CanQualType PointeeType
	= Context.getCanonicalType(ParamRefType->getPointeeType());
	CanQualType ClassTy
	= Context.getCanonicalType(Context.getTagDeclType(getParent()));
	if (PointeeType.getUnqualifiedType() != ClassTy)
	return false;

	// FIXME: other qualifiers?

	// We have a copy or move constructor.
	TypeQuals = PointeeType.getCVRQualifiers();
	return true;
	}

	bool CXXConstructorDecl::isConvertingConstructor(bool AllowExplicit) const {
	// C++ [class.conv.ctor]p1:
	// A constructor declared without the function-specifier explicit
	// that can be called with a single parameter specifies a
	// conversion from the type of its first parameter to the type of
	// its class. Such a constructor is called a converting
	// constructor.
	if (isExplicit() && !AllowExplicit)
	return false;

	return (getNumParams() == 0 &&
	getType()->getAs<FunctionProtoType>()->isVariadic()) \|\|
	(getNumParams() == 1) \|\|
	(getNumParams() > 1 &&
	(getParamDecl(1)->hasDefaultArg() \|\|
	getParamDecl(1)->isParameterPack()));
	}

	bool CXXConstructorDecl::isSpecializationCopyingObject() const {
	if ((getNumParams() < 1) \|\|
	(getNumParams() > 1 && !getParamDecl(1)->hasDefaultArg()) \|\|
	(getDescribedFunctionTemplate() != nullptr))
	return false;

	const ParmVarDecl *Param = getParamDecl(0);

	ASTContext &Context = getASTContext();
	CanQualType ParamType = Context.getCanonicalType(Param->getType());

	// Is it the same as our our class type?
	CanQualType ClassTy
	= Context.getCanonicalType(Context.getTagDeclType(getParent()));
	if (ParamType.getUnqualifiedType() != ClassTy)
	return false;

	return true;
	}

	void CXXDestructorDecl::anchor() { }

	CXXDestructorDecl *
	CXXDestructorDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
	return new (C, ID)
	CXXDestructorDecl(C, nullptr, SourceLocation(), DeclarationNameInfo(),
	QualType(), nullptr, false, false);
	}

	CXXDestructorDecl *
	CXXDestructorDecl::Create(ASTContext &C, CXXRecordDecl *RD,
	SourceLocation StartLoc,
	const DeclarationNameInfo &NameInfo,
	QualType T, TypeSourceInfo *TInfo,
	bool isInline, bool isImplicitlyDeclared) {
	assert(NameInfo.getName().getNameKind()
	== DeclarationName::CXXDestructorName &&
	"Name must refer to a destructor");
	return new (C, RD) CXXDestructorDecl(C, RD, StartLoc, NameInfo, T, TInfo,
	isInline, isImplicitlyDeclared);
	}

	void CXXDestructorDecl::setOperatorDelete(FunctionDecl *OD) {
	auto *First = cast<CXXDestructorDecl>(getFirstDecl());
	if (OD && !First->OperatorDelete) {
	First->OperatorDelete = OD;
	if (auto *L = getASTMutationListener())
	L->ResolvedOperatorDelete(First, OD);
	}
	}

	void CXXConversionDecl::anchor() { }

	CXXConversionDecl *
	CXXConversionDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
	return new (C, ID) CXXConversionDecl(C, nullptr, SourceLocation(),
	DeclarationNameInfo(), QualType(),
	nullptr, false, false, false,
	SourceLocation());
	}

	CXXConversionDecl *
	CXXConversionDecl::Create(ASTContext &C, CXXRecordDecl *RD,
	SourceLocation StartLoc,
	const DeclarationNameInfo &NameInfo,
	QualType T, TypeSourceInfo *TInfo,
	bool isInline, bool isExplicit,
	bool isConstexpr, SourceLocation EndLocation) {
	assert(NameInfo.getName().getNameKind()
	== DeclarationName::CXXConversionFunctionName &&
	"Name must refer to a conversion function");
	return new (C, RD) CXXConversionDecl(C, RD, StartLoc, NameInfo, T, TInfo,
	isInline, isExplicit, isConstexpr,
	EndLocation);
	}

	bool CXXConversionDecl::isLambdaToBlockPointerConversion() const {
	return isImplicit() && getParent()->isLambda() &&
	getConversionType()->isBlockPointerType();
	}

	void LinkageSpecDecl::anchor() { }

	LinkageSpecDecl *LinkageSpecDecl::Create(ASTContext &C,
	DeclContext *DC,
	SourceLocation ExternLoc,
	SourceLocation LangLoc,
	LanguageIDs Lang,
	bool HasBraces) {
	return new (C, DC) LinkageSpecDecl(DC, ExternLoc, LangLoc, Lang, HasBraces);
	}

	LinkageSpecDecl *LinkageSpecDecl::CreateDeserialized(ASTContext &C,
	unsigned ID) {
	return new (C, ID) LinkageSpecDecl(nullptr, SourceLocation(),
	SourceLocation(), lang_c, false);
	}

	void UsingDirectiveDecl::anchor() { }

	UsingDirectiveDecl UsingDirectiveDecl::Create(ASTContext &C, DeclContext DC,
	SourceLocation L,
	SourceLocation NamespaceLoc,
	NestedNameSpecifierLoc QualifierLoc,
	SourceLocation IdentLoc,
	NamedDecl *Used,
	DeclContext *CommonAncestor) {
	if (NamespaceDecl *NS = dyn_cast_or_null<NamespaceDecl>(Used))
	Used = NS->getOriginalNamespace();
	return new (C, DC) UsingDirectiveDecl(DC, L, NamespaceLoc, QualifierLoc,
	IdentLoc, Used, CommonAncestor);
	}

	UsingDirectiveDecl *UsingDirectiveDecl::CreateDeserialized(ASTContext &C,
	unsigned ID) {
	return new (C, ID) UsingDirectiveDecl(nullptr, SourceLocation(),
	SourceLocation(),
	NestedNameSpecifierLoc(),
	SourceLocation(), nullptr, nullptr);
	}

	NamespaceDecl *UsingDirectiveDecl::getNominatedNamespace() {
	if (NamespaceAliasDecl *NA =
	dyn_cast_or_null<NamespaceAliasDecl>(NominatedNamespace))
	return NA->getNamespace();
	return cast_or_null<NamespaceDecl>(NominatedNamespace);
	}

	NamespaceDecl::NamespaceDecl(ASTContext &C, DeclContext *DC, bool Inline,
	SourceLocation StartLoc, SourceLocation IdLoc,
	IdentifierInfo Id, NamespaceDecl PrevDecl)
	: NamedDecl(Namespace, DC, IdLoc, Id), DeclContext(Namespace),
	redeclarable_base(C), LocStart(StartLoc), RBraceLoc(),
	AnonOrFirstNamespaceAndInline(nullptr, Inline) {
	setPreviousDecl(PrevDecl);

	if (PrevDecl)
	AnonOrFirstNamespaceAndInline.setPointer(PrevDecl->getOriginalNamespace());
	}

	NamespaceDecl NamespaceDecl::Create(ASTContext &C, DeclContext DC,
	bool Inline, SourceLocation StartLoc,
	SourceLocation IdLoc, IdentifierInfo *Id,
	NamespaceDecl *PrevDecl) {
	return new (C, DC) NamespaceDecl(C, DC, Inline, StartLoc, IdLoc, Id,
	PrevDecl);
	}

	NamespaceDecl *NamespaceDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
	return new (C, ID) NamespaceDecl(C, nullptr, false, SourceLocation(),
	SourceLocation(), nullptr, nullptr);
	}

	NamespaceDecl *NamespaceDecl::getOriginalNamespace() {
	if (isFirstDecl())
	return this;

	return AnonOrFirstNamespaceAndInline.getPointer();
	}

	const NamespaceDecl *NamespaceDecl::getOriginalNamespace() const {
	if (isFirstDecl())
	return this;

	return AnonOrFirstNamespaceAndInline.getPointer();
	}

	bool NamespaceDecl::isOriginalNamespace() const { return isFirstDecl(); }

	NamespaceDecl *NamespaceDecl::getNextRedeclarationImpl() {
	return getNextRedeclaration();
	}
	NamespaceDecl *NamespaceDecl::getPreviousDeclImpl() {
	return getPreviousDecl();
	}
	NamespaceDecl *NamespaceDecl::getMostRecentDeclImpl() {
	return getMostRecentDecl();
	}

	void NamespaceAliasDecl::anchor() { }

	NamespaceAliasDecl *NamespaceAliasDecl::getNextRedeclarationImpl() {
	return getNextRedeclaration();
	}
	NamespaceAliasDecl *NamespaceAliasDecl::getPreviousDeclImpl() {
	return getPreviousDecl();
	}
	NamespaceAliasDecl *NamespaceAliasDecl::getMostRecentDeclImpl() {
	return getMostRecentDecl();
	}

	NamespaceAliasDecl NamespaceAliasDecl::Create(ASTContext &C, DeclContext DC,
	SourceLocation UsingLoc,
	SourceLocation AliasLoc,
	IdentifierInfo *Alias,
	NestedNameSpecifierLoc QualifierLoc,
	SourceLocation IdentLoc,
	NamedDecl *Namespace) {
	// FIXME: Preserve the aliased namespace as written.
	if (NamespaceDecl *NS = dyn_cast_or_null<NamespaceDecl>(Namespace))
	Namespace = NS->getOriginalNamespace();
	return new (C, DC) NamespaceAliasDecl(C, DC, UsingLoc, AliasLoc, Alias,
	QualifierLoc, IdentLoc, Namespace);
	}

	NamespaceAliasDecl *
	NamespaceAliasDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
	return new (C, ID) NamespaceAliasDecl(C, nullptr, SourceLocation(),
	SourceLocation(), nullptr,
	NestedNameSpecifierLoc(),
	SourceLocation(), nullptr);
	}

	void UsingShadowDecl::anchor() { }

	UsingShadowDecl::UsingShadowDecl(Kind K, ASTContext &C, DeclContext *DC,
	SourceLocation Loc, UsingDecl *Using,
	NamedDecl *Target)
	: NamedDecl(K, DC, Loc, Using ? Using->getDeclName() : DeclarationName()),
	redeclarable_base(C), Underlying(Target),
	UsingOrNextShadow(cast<NamedDecl>(Using)) {
	if (Target)
	IdentifierNamespace = Target->getIdentifierNamespace();
	setImplicit();
	}

	UsingShadowDecl::UsingShadowDecl(Kind K, ASTContext &C, EmptyShell Empty)
	: NamedDecl(K, nullptr, SourceLocation(), DeclarationName()),
	redeclarable_base(C), Underlying(), UsingOrNextShadow() {}

	UsingShadowDecl *
	UsingShadowDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
	return new (C, ID) UsingShadowDecl(UsingShadow, C, EmptyShell());
	}

	UsingDecl *UsingShadowDecl::getUsingDecl() const {
	const UsingShadowDecl *Shadow = this;
	while (const UsingShadowDecl *NextShadow =
	dyn_cast<UsingShadowDecl>(Shadow->UsingOrNextShadow))
	Shadow = NextShadow;
	return cast<UsingDecl>(Shadow->UsingOrNextShadow);
	}

	void ConstructorUsingShadowDecl::anchor() { }

	ConstructorUsingShadowDecl *
	ConstructorUsingShadowDecl::Create(ASTContext &C, DeclContext *DC,
	SourceLocation Loc, UsingDecl *Using,
	NamedDecl *Target, bool IsVirtual) {
	return new (C, DC) ConstructorUsingShadowDecl(C, DC, Loc, Using, Target,
	IsVirtual);
	}

	ConstructorUsingShadowDecl *
	ConstructorUsingShadowDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
	return new (C, ID) ConstructorUsingShadowDecl(C, EmptyShell());
	}

	CXXRecordDecl *ConstructorUsingShadowDecl::getNominatedBaseClass() const {
	return getUsingDecl()->getQualifier()->getAsRecordDecl();
	}

	void UsingDecl::anchor() { }

	void UsingDecl::addShadowDecl(UsingShadowDecl *S) {
	assert(std::find(shadow_begin(), shadow_end(), S) == shadow_end() &&
	"declaration already in set");
	assert(S->getUsingDecl() == this);

	if (FirstUsingShadow.getPointer())
	S->UsingOrNextShadow = FirstUsingShadow.getPointer();
	FirstUsingShadow.setPointer(S);
	}

	void UsingDecl::removeShadowDecl(UsingShadowDecl *S) {
	assert(std::find(shadow_begin(), shadow_end(), S) != shadow_end() &&
	"declaration not in set");
	assert(S->getUsingDecl() == this);

	// Remove S from the shadow decl chain. This is O(n) but hopefully rare.

	if (FirstUsingShadow.getPointer() == S) {
	FirstUsingShadow.setPointer(
	dyn_cast<UsingShadowDecl>(S->UsingOrNextShadow));
	S->UsingOrNextShadow = this;
	return;
	}

	UsingShadowDecl *Prev = FirstUsingShadow.getPointer();
	while (Prev->UsingOrNextShadow != S)
	Prev = cast<UsingShadowDecl>(Prev->UsingOrNextShadow);
	Prev->UsingOrNextShadow = S->UsingOrNextShadow;
	S->UsingOrNextShadow = this;
	}

	UsingDecl UsingDecl::Create(ASTContext &C, DeclContext DC, SourceLocation UL,
	NestedNameSpecifierLoc QualifierLoc,
	const DeclarationNameInfo &NameInfo,
	bool HasTypename) {
	return new (C, DC) UsingDecl(DC, UL, QualifierLoc, NameInfo, HasTypename);
	}

	UsingDecl *UsingDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
	return new (C, ID) UsingDecl(nullptr, SourceLocation(),
	NestedNameSpecifierLoc(), DeclarationNameInfo(),
	false);
	}

	SourceRange UsingDecl::getSourceRange() const {
	SourceLocation Begin = isAccessDeclaration()
	? getQualifierLoc().getBeginLoc() : UsingLocation;
	return SourceRange(Begin, getNameInfo().getEndLoc());
	}

	void UsingPackDecl::anchor() { }

	UsingPackDecl UsingPackDecl::Create(ASTContext &C, DeclContext DC,
	NamedDecl *InstantiatedFrom,
	ArrayRef<NamedDecl *> UsingDecls) {
	size_t Extra = additionalSizeToAlloc<NamedDecl *>(UsingDecls.size());
	return new (C, DC, Extra) UsingPackDecl(DC, InstantiatedFrom, UsingDecls);
	}

	UsingPackDecl *UsingPackDecl::CreateDeserialized(ASTContext &C, unsigned ID,
	unsigned NumExpansions) {
	size_t Extra = additionalSizeToAlloc<NamedDecl *>(NumExpansions);
	auto *Result = new (C, ID, Extra) UsingPackDecl(nullptr, nullptr, None);
	Result->NumExpansions = NumExpansions;
	auto Trail = Result->getTrailingObjects<NamedDecl >();
	for (unsigned I = 0; I != NumExpansions; ++I)
	new (Trail + I) NamedDecl*(nullptr);
	return Result;
	}

	void UnresolvedUsingValueDecl::anchor() { }

	UnresolvedUsingValueDecl *
	UnresolvedUsingValueDecl::Create(ASTContext &C, DeclContext *DC,
	SourceLocation UsingLoc,
	NestedNameSpecifierLoc QualifierLoc,
	const DeclarationNameInfo &NameInfo,
	SourceLocation EllipsisLoc) {
	return new (C, DC) UnresolvedUsingValueDecl(DC, C.DependentTy, UsingLoc,
	QualifierLoc, NameInfo,
	EllipsisLoc);
	}

	UnresolvedUsingValueDecl *
	UnresolvedUsingValueDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
	return new (C, ID) UnresolvedUsingValueDecl(nullptr, QualType(),
	SourceLocation(),
	NestedNameSpecifierLoc(),
	DeclarationNameInfo(),
	SourceLocation());
	}

	SourceRange UnresolvedUsingValueDecl::getSourceRange() const {
	SourceLocation Begin = isAccessDeclaration()
	? getQualifierLoc().getBeginLoc() : UsingLocation;
	return SourceRange(Begin, getNameInfo().getEndLoc());
	}

	void UnresolvedUsingTypenameDecl::anchor() { }

	UnresolvedUsingTypenameDecl *
	UnresolvedUsingTypenameDecl::Create(ASTContext &C, DeclContext *DC,
	SourceLocation UsingLoc,
	SourceLocation TypenameLoc,
	NestedNameSpecifierLoc QualifierLoc,
	SourceLocation TargetNameLoc,
	DeclarationName TargetName,
	SourceLocation EllipsisLoc) {
	return new (C, DC) UnresolvedUsingTypenameDecl(
	DC, UsingLoc, TypenameLoc, QualifierLoc, TargetNameLoc,
	TargetName.getAsIdentifierInfo(), EllipsisLoc);
	}

	UnresolvedUsingTypenameDecl *
	UnresolvedUsingTypenameDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
	return new (C, ID) UnresolvedUsingTypenameDecl(
	nullptr, SourceLocation(), SourceLocation(), NestedNameSpecifierLoc(),
	SourceLocation(), nullptr, SourceLocation());
	}

	void StaticAssertDecl::anchor() { }

	StaticAssertDecl StaticAssertDecl::Create(ASTContext &C, DeclContext DC,
	SourceLocation StaticAssertLoc,
	Expr *AssertExpr,
	StringLiteral *Message,
	SourceLocation RParenLoc,
	bool Failed) {
	return new (C, DC) StaticAssertDecl(DC, StaticAssertLoc, AssertExpr, Message,
	RParenLoc, Failed);
	}

	StaticAssertDecl *StaticAssertDecl::CreateDeserialized(ASTContext &C,
	unsigned ID) {
	return new (C, ID) StaticAssertDecl(nullptr, SourceLocation(), nullptr,
	nullptr, SourceLocation(), false);
	}

	void BindingDecl::anchor() {}

	BindingDecl BindingDecl::Create(ASTContext &C, DeclContext DC,
	SourceLocation IdLoc, IdentifierInfo *Id) {
	return new (C, DC) BindingDecl(DC, IdLoc, Id);
	}

	BindingDecl *BindingDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
	return new (C, ID) BindingDecl(nullptr, SourceLocation(), nullptr);
	}

	VarDecl *BindingDecl::getHoldingVar() const {
	Expr *B = getBinding();
	if (!B)
	return nullptr;
	auto *DRE = dyn_cast<DeclRefExpr>(B->IgnoreImplicit());
	if (!DRE)
	return nullptr;

	auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
	assert(VD->isImplicit() && "holding var for binding decl not implicit");
	return VD;
	}

	void DecompositionDecl::anchor() {}

	DecompositionDecl DecompositionDecl::Create(ASTContext &C, DeclContext DC,
	SourceLocation StartLoc,
	SourceLocation LSquareLoc,
	QualType T, TypeSourceInfo *TInfo,
	StorageClass SC,
	ArrayRef<BindingDecl *> Bindings) {
	size_t Extra = additionalSizeToAlloc<BindingDecl *>(Bindings.size());
	return new (C, DC, Extra)
	DecompositionDecl(C, DC, StartLoc, LSquareLoc, T, TInfo, SC, Bindings);
	}

	DecompositionDecl *DecompositionDecl::CreateDeserialized(ASTContext &C,
	unsigned ID,
	unsigned NumBindings) {
	size_t Extra = additionalSizeToAlloc<BindingDecl *>(NumBindings);
	auto *Result = new (C, ID, Extra)
	DecompositionDecl(C, nullptr, SourceLocation(), SourceLocation(),
	QualType(), nullptr, StorageClass(), None);
	// Set up and clean out the bindings array.
	Result->NumBindings = NumBindings;
	auto Trail = Result->getTrailingObjects<BindingDecl >();
	for (unsigned I = 0; I != NumBindings; ++I)
	new (Trail + I) BindingDecl*(nullptr);
	return Result;
	}

	void DecompositionDecl::printName(llvm::raw_ostream &os) const {
	os << '[';
	bool Comma = false;
	for (auto *B : bindings()) {
	if (Comma)
	os << ", ";
	B->printName(os);
	Comma = true;
	}
	os << ']';
	}

	MSPropertyDecl MSPropertyDecl::Create(ASTContext &C, DeclContext DC,
	SourceLocation L, DeclarationName N,
	QualType T, TypeSourceInfo *TInfo,
	SourceLocation StartL,
	IdentifierInfo *Getter,
	IdentifierInfo *Setter) {
	return new (C, DC) MSPropertyDecl(DC, L, N, T, TInfo, StartL, Getter, Setter);
	}

	MSPropertyDecl *MSPropertyDecl::CreateDeserialized(ASTContext &C,
	unsigned ID) {
	return new (C, ID) MSPropertyDecl(nullptr, SourceLocation(),
	DeclarationName(), QualType(), nullptr,
	SourceLocation(), nullptr, nullptr);
	}

	static const char *getAccessName(AccessSpecifier AS) {
	switch (AS) {
	case AS_none:
	llvm_unreachable("Invalid access specifier!");
	case AS_public:
	return "public";
	case AS_private:
	return "private";
	case AS_protected:
	return "protected";
	}
	llvm_unreachable("Invalid access specifier!");
	}

	const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB,
	AccessSpecifier AS) {
	return DB << getAccessName(AS);
	}

	const PartialDiagnostic &clang::operator<<(const PartialDiagnostic &DB,
	AccessSpecifier AS) {
	return DB << getAccessName(AS);
	}
	Index: head/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.cpp (revision 322855)
	@@ -1,333 +1,304 @@
	//===----- CGCXXABI.cpp - Interface to C++ ABIs ---------------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This provides an abstract class for C++ code generation. Concrete subclasses
	// of this implement code generation for specific C++ ABIs.
	//
	//===----------------------------------------------------------------------===//

	#include "CGCXXABI.h"
	#include "CGCleanup.h"

	using namespace clang;
	using namespace CodeGen;

	CGCXXABI::~CGCXXABI() { }

	void CGCXXABI::ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) {
	DiagnosticsEngine &Diags = CGF.CGM.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot yet compile %0 in this ABI");
	Diags.Report(CGF.getContext().getFullLoc(CGF.CurCodeDecl->getLocation()),
	DiagID)
	<< S;
	}

	bool CGCXXABI::canCopyArgument(const CXXRecordDecl *RD) const {
	- // If RD has a non-trivial move or copy constructor, we cannot copy the
	- // argument.
	- if (RD->hasNonTrivialCopyConstructor() \|\| RD->hasNonTrivialMoveConstructor())
	- return false;
	-
	- // If RD has a non-trivial destructor, we cannot copy the argument.
	- if (RD->hasNonTrivialDestructor())
	- return false;
	-
	// We can only copy the argument if there exists at least one trivial,
	// non-deleted copy or move constructor.
	- // FIXME: This assumes that all lazily declared copy and move constructors are
	- // not deleted. This assumption might not be true in some corner cases.
	- bool CopyDeleted = false;
	- bool MoveDeleted = false;
	- for (const CXXConstructorDecl *CD : RD->ctors()) {
	- if (CD->isCopyConstructor() \|\| CD->isMoveConstructor()) {
	- assert(CD->isTrivial());
	- // We had at least one undeleted trivial copy or move ctor. Return
	- // directly.
	- if (!CD->isDeleted())
	- return true;
	- if (CD->isCopyConstructor())
	- CopyDeleted = true;
	- else
	- MoveDeleted = true;
	- }
	- }
	-
	- // If all trivial copy and move constructors are deleted, we cannot copy the
	- // argument.
	- return !(CopyDeleted && MoveDeleted);
	+ return RD->canPassInRegisters();
	}

	llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) {
	return llvm::Constant::getNullValue(CGM.getTypes().ConvertType(T));
	}

	llvm::Type *
	CGCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
	return CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType());
	}

	CGCallee CGCXXABI::EmitLoadOfMemberFunctionPointer(
	CodeGenFunction &CGF, const Expr *E, Address This,
	llvm::Value *&ThisPtrForCall,
	llvm::Value MemPtr, const MemberPointerType MPT) {
	ErrorUnsupportedABI(CGF, "calls through member pointers");

	ThisPtrForCall = This.getPointer();
	const FunctionProtoType *FPT =
	MPT->getPointeeType()->getAs<FunctionProtoType>();
	const CXXRecordDecl *RD =
	cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl());
	llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
	CGM.getTypes().arrangeCXXMethodType(RD, FPT, /FD=/nullptr));
	llvm::Constant *FnPtr = llvm::Constant::getNullValue(FTy->getPointerTo());
	return CGCallee::forDirect(FnPtr, FPT);
	}

	llvm::Value *
	CGCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
	Address Base, llvm::Value *MemPtr,
	const MemberPointerType *MPT) {
	ErrorUnsupportedABI(CGF, "loads of member pointers");
	llvm::Type *Ty = CGF.ConvertType(MPT->getPointeeType())
	->getPointerTo(Base.getAddressSpace());
	return llvm::Constant::getNullValue(Ty);
	}

	llvm::Value *CGCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
	const CastExpr *E,
	llvm::Value *Src) {
	ErrorUnsupportedABI(CGF, "member function pointer conversions");
	return GetBogusMemberPointer(E->getType());
	}

	llvm::Constant CGCXXABI::EmitMemberPointerConversion(const CastExpr E,
	llvm::Constant *Src) {
	return GetBogusMemberPointer(E->getType());
	}

	llvm::Value *
	CGCXXABI::EmitMemberPointerComparison(CodeGenFunction &CGF,
	llvm::Value *L,
	llvm::Value *R,
	const MemberPointerType *MPT,
	bool Inequality) {
	ErrorUnsupportedABI(CGF, "member function pointer comparison");
	return CGF.Builder.getFalse();
	}

	llvm::Value *
	CGCXXABI::EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
	llvm::Value *MemPtr,
	const MemberPointerType *MPT) {
	ErrorUnsupportedABI(CGF, "member function pointer null testing");
	return CGF.Builder.getFalse();
	}

	llvm::Constant *
	CGCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) {
	return GetBogusMemberPointer(QualType(MPT, 0));
	}

	llvm::Constant CGCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl MD) {
	return GetBogusMemberPointer(CGM.getContext().getMemberPointerType(
	MD->getType(), MD->getParent()->getTypeForDecl()));
	}

	llvm::Constant CGCXXABI::EmitMemberDataPointer(const MemberPointerType MPT,
	CharUnits offset) {
	return GetBogusMemberPointer(QualType(MPT, 0));
	}

	llvm::Constant *CGCXXABI::EmitMemberPointer(const APValue &MP, QualType MPT) {
	return GetBogusMemberPointer(MPT);
	}

	bool CGCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
	// Fake answer.
	return true;
	}

	void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList &params) {
	const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());

	// FIXME: I'm not entirely sure I like using a fake decl just for code
	// generation. Maybe we can come up with a better way?
	auto *ThisDecl = ImplicitParamDecl::Create(
	CGM.getContext(), nullptr, MD->getLocation(),
	&CGM.getContext().Idents.get("this"), MD->getThisType(CGM.getContext()),
	ImplicitParamDecl::CXXThis);
	params.push_back(ThisDecl);
	CGF.CXXABIThisDecl = ThisDecl;

	// Compute the presumed alignment of 'this', which basically comes
	// down to whether we know it's a complete object or not.
	auto &Layout = CGF.getContext().getASTRecordLayout(MD->getParent());
	if (MD->getParent()->getNumVBases() == 0 \|\| // avoid vcall in common case
	MD->getParent()->hasAttr<FinalAttr>() \|\|
	!isThisCompleteObject(CGF.CurGD)) {
	CGF.CXXABIThisAlignment = Layout.getAlignment();
	} else {
	CGF.CXXABIThisAlignment = Layout.getNonVirtualAlignment();
	}
	}

	void CGCXXABI::EmitThisParam(CodeGenFunction &CGF) {
	/// Initialize the 'this' slot.
	assert(getThisDecl(CGF) && "no 'this' variable for function");
	CGF.CXXABIThisValue
	= CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(getThisDecl(CGF)),
	"this");
	}

	void CGCXXABI::EmitReturnFromThunk(CodeGenFunction &CGF,
	RValue RV, QualType ResultType) {
	CGF.EmitReturnOfRValue(RV, ResultType);
	}

	CharUnits CGCXXABI::GetArrayCookieSize(const CXXNewExpr *expr) {
	if (!requiresArrayCookie(expr))
	return CharUnits::Zero();
	return getArrayCookieSizeImpl(expr->getAllocatedType());
	}

	CharUnits CGCXXABI::getArrayCookieSizeImpl(QualType elementType) {
	// BOGUS
	return CharUnits::Zero();
	}

	Address CGCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
	Address NewPtr,
	llvm::Value *NumElements,
	const CXXNewExpr *expr,
	QualType ElementType) {
	// Should never be called.
	ErrorUnsupportedABI(CGF, "array cookie initialization");
	return Address::invalid();
	}

	bool CGCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr,
	QualType elementType) {
	// If the class's usual deallocation function takes two arguments,
	// it needs a cookie.
	if (expr->doesUsualArrayDeleteWantSize())
	return true;

	return elementType.isDestructedType();
	}

	bool CGCXXABI::requiresArrayCookie(const CXXNewExpr *expr) {
	// If the class's usual deallocation function takes two arguments,
	// it needs a cookie.
	if (expr->doesUsualArrayDeleteWantSize())
	return true;

	return expr->getAllocatedType().isDestructedType();
	}

	void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, Address ptr,
	const CXXDeleteExpr *expr, QualType eltTy,
	llvm::Value *&numElements,
	llvm::Value *&allocPtr, CharUnits &cookieSize) {
	// Derive a char* in the same address space as the pointer.
	ptr = CGF.Builder.CreateElementBitCast(ptr, CGF.Int8Ty);

	// If we don't need an array cookie, bail out early.
	if (!requiresArrayCookie(expr, eltTy)) {
	allocPtr = ptr.getPointer();
	numElements = nullptr;
	cookieSize = CharUnits::Zero();
	return;
	}

	cookieSize = getArrayCookieSizeImpl(eltTy);
	Address allocAddr =
	CGF.Builder.CreateConstInBoundsByteGEP(ptr, -cookieSize);
	allocPtr = allocAddr.getPointer();
	numElements = readArrayCookieImpl(CGF, allocAddr, cookieSize);
	}

	llvm::Value *CGCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
	Address ptr,
	CharUnits cookieSize) {
	ErrorUnsupportedABI(CGF, "reading a new[] cookie");
	return llvm::ConstantInt::get(CGF.SizeTy, 0);
	}

	/// Returns the adjustment, in bytes, required for the given
	/// member-pointer operation. Returns null if no adjustment is
	/// required.
	llvm::Constant CGCXXABI::getMemberPointerAdjustment(const CastExpr E) {
	assert(E->getCastKind() == CK_DerivedToBaseMemberPointer \|\|
	E->getCastKind() == CK_BaseToDerivedMemberPointer);

	QualType derivedType;
	if (E->getCastKind() == CK_DerivedToBaseMemberPointer)
	derivedType = E->getSubExpr()->getType();
	else
	derivedType = E->getType();

	const CXXRecordDecl *derivedClass =
	derivedType->castAs<MemberPointerType>()->getClass()->getAsCXXRecordDecl();

	return CGM.GetNonVirtualBaseClassOffset(derivedClass,
	E->path_begin(),
	E->path_end());
	}

	CharUnits CGCXXABI::getMemberPointerPathAdjustment(const APValue &MP) {
	// TODO: Store base specifiers in APValue member pointer paths so we can
	// easily reuse CGM.GetNonVirtualBaseClassOffset().
	const ValueDecl *MPD = MP.getMemberPointerDecl();
	CharUnits ThisAdjustment = CharUnits::Zero();
	ArrayRef<const CXXRecordDecl*> Path = MP.getMemberPointerPath();
	bool DerivedMember = MP.isMemberPointerToDerivedMember();
	const CXXRecordDecl *RD = cast<CXXRecordDecl>(MPD->getDeclContext());
	for (unsigned I = 0, N = Path.size(); I != N; ++I) {
	const CXXRecordDecl *Base = RD;
	const CXXRecordDecl *Derived = Path[I];
	if (DerivedMember)
	std::swap(Base, Derived);
	ThisAdjustment +=
	getContext().getASTRecordLayout(Derived).getBaseClassOffset(Base);
	RD = Path[I];
	}
	if (DerivedMember)
	ThisAdjustment = -ThisAdjustment;
	return ThisAdjustment;
	}

	llvm::BasicBlock *
	CGCXXABI::EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
	const CXXRecordDecl *RD) {
	if (CGM.getTarget().getCXXABI().hasConstructorVariants())
	llvm_unreachable("shouldn't be called in this ABI");

	ErrorUnsupportedABI(CGF, "complete object detection in ctor");
	return nullptr;
	}

	bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) {
	return false;
	}

	llvm::CallInst *
	CGCXXABI::emitTerminateForUnexpectedException(CodeGenFunction &CGF,
	llvm::Value *Exn) {
	// Just call std::terminate and ignore the violating exception.
	return CGF.EmitNounwindRuntimeCall(CGF.CGM.getTerminateFn());
	}

	CatchTypeInfo CGCXXABI::getCatchAllTypeInfo() {
	return CatchTypeInfo{nullptr, 0};
	}

	std::vector<CharUnits> CGCXXABI::getVBPtrOffsets(const CXXRecordDecl *RD) {
	return std::vector<CharUnits>();
	}
	Index: head/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp (revision 322855)
	@@ -1,4007 +1,4002 @@
	//===------- ItaniumCXXABI.cpp - Emit LLVM Code from ASTs for a Module ----===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This provides C++ code generation targeting the Itanium C++ ABI. The class
	// in this file generates structures that follow the Itanium C++ ABI, which is
	// documented at:
	// http://www.codesourcery.com/public/cxx-abi/abi.html
	// http://www.codesourcery.com/public/cxx-abi/abi-eh.html
	//
	// It also supports the closely-related ARM ABI, documented at:
	// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0041c/IHI0041C_cppabi.pdf
	//
	//===----------------------------------------------------------------------===//

	#include "CGCXXABI.h"
	#include "CGCleanup.h"
	#include "CGRecordLayout.h"
	#include "CGVTables.h"
	#include "CodeGenFunction.h"
	#include "CodeGenModule.h"
	#include "TargetInfo.h"
	#include "clang/CodeGen/ConstantInitBuilder.h"
	#include "clang/AST/Mangle.h"
	#include "clang/AST/Type.h"
	#include "clang/AST/StmtCXX.h"
	#include "llvm/IR/CallSite.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/Value.h"

	using namespace clang;
	using namespace CodeGen;

	namespace {
	class ItaniumCXXABI : public CodeGen::CGCXXABI {
	/// VTables - All the vtables which have been defined.
	llvm::DenseMap<const CXXRecordDecl , llvm::GlobalVariable > VTables;

	protected:
	bool UseARMMethodPtrABI;
	bool UseARMGuardVarABI;
	bool Use32BitVTableOffsetABI;

	ItaniumMangleContext &getMangleContext() {
	return cast<ItaniumMangleContext>(CodeGen::CGCXXABI::getMangleContext());
	}

	public:
	ItaniumCXXABI(CodeGen::CodeGenModule &CGM,
	bool UseARMMethodPtrABI = false,
	bool UseARMGuardVarABI = false) :
	CGCXXABI(CGM), UseARMMethodPtrABI(UseARMMethodPtrABI),
	UseARMGuardVarABI(UseARMGuardVarABI),
	Use32BitVTableOffsetABI(false) { }

	bool classifyReturnType(CGFunctionInfo &FI) const override;

	RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override {
	- // Structures with either a non-trivial destructor or a non-trivial
	- // copy constructor are always indirect.
	- // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared
	- // special members.
	- if (RD->hasNonTrivialDestructor() \|\| RD->hasNonTrivialCopyConstructor())
	+ // If C++ prohibits us from making a copy, pass by address.
	+ if (!canCopyArgument(RD))
	return RAA_Indirect;
	return RAA_Default;
	}

	bool isThisCompleteObject(GlobalDecl GD) const override {
	// The Itanium ABI has separate complete-object vs. base-object
	// variants of both constructors and destructors.
	if (isa<CXXDestructorDecl>(GD.getDecl())) {
	switch (GD.getDtorType()) {
	case Dtor_Complete:
	case Dtor_Deleting:
	return true;

	case Dtor_Base:
	return false;

	case Dtor_Comdat:
	llvm_unreachable("emitting dtor comdat as function?");
	}
	llvm_unreachable("bad dtor kind");
	}
	if (isa<CXXConstructorDecl>(GD.getDecl())) {
	switch (GD.getCtorType()) {
	case Ctor_Complete:
	return true;

	case Ctor_Base:
	return false;

	case Ctor_CopyingClosure:
	case Ctor_DefaultClosure:
	llvm_unreachable("closure ctors in Itanium ABI?");

	case Ctor_Comdat:
	llvm_unreachable("emitting ctor comdat as function?");
	}
	llvm_unreachable("bad dtor kind");
	}

	// No other kinds.
	return false;
	}

	bool isZeroInitializable(const MemberPointerType *MPT) override;

	llvm::Type ConvertMemberPointerType(const MemberPointerType MPT) override;

	CGCallee
	EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
	const Expr *E,
	Address This,
	llvm::Value *&ThisPtrForCall,
	llvm::Value *MemFnPtr,
	const MemberPointerType *MPT) override;

	llvm::Value *
	EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
	Address Base,
	llvm::Value *MemPtr,
	const MemberPointerType *MPT) override;

	llvm::Value *EmitMemberPointerConversion(CodeGenFunction &CGF,
	const CastExpr *E,
	llvm::Value *Src) override;
	llvm::Constant EmitMemberPointerConversion(const CastExpr E,
	llvm::Constant *Src) override;

	llvm::Constant EmitNullMemberPointer(const MemberPointerType MPT) override;

	llvm::Constant EmitMemberFunctionPointer(const CXXMethodDecl MD) override;
	llvm::Constant EmitMemberDataPointer(const MemberPointerType MPT,
	CharUnits offset) override;
	llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT) override;
	llvm::Constant BuildMemberPointer(const CXXMethodDecl MD,
	CharUnits ThisAdjustment);

	llvm::Value *EmitMemberPointerComparison(CodeGenFunction &CGF,
	llvm::Value L, llvm::Value R,
	const MemberPointerType *MPT,
	bool Inequality) override;

	llvm::Value *EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
	llvm::Value *Addr,
	const MemberPointerType *MPT) override;

	void emitVirtualObjectDelete(CodeGenFunction &CGF, const CXXDeleteExpr *DE,
	Address Ptr, QualType ElementType,
	const CXXDestructorDecl *Dtor) override;

	CharUnits getAlignmentOfExnObject() {
	unsigned Align = CGM.getContext().getTargetInfo().getExnObjectAlignment();
	return CGM.getContext().toCharUnitsFromBits(Align);
	}

	void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override;
	void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) override;

	void emitBeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *C) override;

	llvm::CallInst *
	emitTerminateForUnexpectedException(CodeGenFunction &CGF,
	llvm::Value *Exn) override;

	void EmitFundamentalRTTIDescriptor(QualType Type, bool DLLExport);
	void EmitFundamentalRTTIDescriptors(bool DLLExport);
	llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
	CatchTypeInfo
	getAddrOfCXXCatchHandlerType(QualType Ty,
	QualType CatchHandlerType) override {
	return CatchTypeInfo{getAddrOfRTTIDescriptor(Ty), 0};
	}

	bool shouldTypeidBeNullChecked(bool IsDeref, QualType SrcRecordTy) override;
	void EmitBadTypeidCall(CodeGenFunction &CGF) override;
	llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy,
	Address ThisPtr,
	llvm::Type *StdTypeInfoPtrTy) override;

	bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
	QualType SrcRecordTy) override;

	llvm::Value *EmitDynamicCastCall(CodeGenFunction &CGF, Address Value,
	QualType SrcRecordTy, QualType DestTy,
	QualType DestRecordTy,
	llvm::BasicBlock *CastEnd) override;

	llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value,
	QualType SrcRecordTy,
	QualType DestTy) override;

	bool EmitBadCastCall(CodeGenFunction &CGF) override;

	llvm::Value *
	GetVirtualBaseClassOffset(CodeGenFunction &CGF, Address This,
	const CXXRecordDecl *ClassDecl,
	const CXXRecordDecl *BaseClassDecl) override;

	void EmitCXXConstructors(const CXXConstructorDecl *D) override;

	AddedStructorArgs
	buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
	SmallVectorImpl<CanQualType> &ArgTys) override;

	bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
	CXXDtorType DT) const override {
	// Itanium does not emit any destructor variant as an inline thunk.
	// Delegating may occur as an optimization, but all variants are either
	// emitted with external linkage or as linkonce if they are inline and used.
	return false;
	}

	void EmitCXXDestructors(const CXXDestructorDecl *D) override;

	void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
	FunctionArgList &Params) override;

	void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;

	AddedStructorArgs
	addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
	CXXCtorType Type, bool ForVirtualBase,
	bool Delegating, CallArgList &Args) override;

	void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
	CXXDtorType Type, bool ForVirtualBase,
	bool Delegating, Address This) override;

	void emitVTableDefinitions(CodeGenVTables &CGVT,
	const CXXRecordDecl *RD) override;

	bool isVirtualOffsetNeededForVTableField(CodeGenFunction &CGF,
	CodeGenFunction::VPtr Vptr) override;

	bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) override {
	return true;
	}

	llvm::Constant *
	getVTableAddressPoint(BaseSubobject Base,
	const CXXRecordDecl *VTableClass) override;

	llvm::Value *getVTableAddressPointInStructor(
	CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
	BaseSubobject Base, const CXXRecordDecl *NearestVBase) override;

	llvm::Value *getVTableAddressPointInStructorWithVTT(
	CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
	BaseSubobject Base, const CXXRecordDecl *NearestVBase);

	llvm::Constant *
	getVTableAddressPointForConstExpr(BaseSubobject Base,
	const CXXRecordDecl *VTableClass) override;

	llvm::GlobalVariable getAddrOfVTable(const CXXRecordDecl RD,
	CharUnits VPtrOffset) override;

	CGCallee getVirtualFunctionPointer(CodeGenFunction &CGF, GlobalDecl GD,
	Address This, llvm::Type *Ty,
	SourceLocation Loc) override;

	llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
	const CXXDestructorDecl *Dtor,
	CXXDtorType DtorType,
	Address This,
	const CXXMemberCallExpr *CE) override;

	void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;

	bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const override;

	void setThunkLinkage(llvm::Function *Thunk, bool ForVTable, GlobalDecl GD,
	bool ReturnAdjustment) override {
	// Allow inlining of thunks by emitting them with available_externally
	// linkage together with vtables when needed.
	if (ForVTable && !Thunk->hasLocalLinkage())
	Thunk->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage);
	}

	llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This,
	const ThisAdjustment &TA) override;

	llvm::Value *performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
	const ReturnAdjustment &RA) override;

	size_t getSrcArgforCopyCtor(const CXXConstructorDecl *,
	FunctionArgList &Args) const override {
	assert(!Args.empty() && "expected the arglist to not be empty!");
	return Args.size() - 1;
	}

	StringRef GetPureVirtualCallName() override { return "__cxa_pure_virtual"; }
	StringRef GetDeletedVirtualCallName() override
	{ return "__cxa_deleted_virtual"; }

	CharUnits getArrayCookieSizeImpl(QualType elementType) override;
	Address InitializeArrayCookie(CodeGenFunction &CGF,
	Address NewPtr,
	llvm::Value *NumElements,
	const CXXNewExpr *expr,
	QualType ElementType) override;
	llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF,
	Address allocPtr,
	CharUnits cookieSize) override;

	void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
	llvm::GlobalVariable *DeclPtr,
	bool PerformInit) override;
	void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
	llvm::Constant dtor, llvm::Constant addr) override;

	llvm::Function getOrCreateThreadLocalWrapper(const VarDecl VD,
	llvm::Value *Val);
	void EmitThreadLocalInitFuncs(
	CodeGenModule &CGM,
	ArrayRef<const VarDecl *> CXXThreadLocals,
	ArrayRef<llvm::Function *> CXXThreadLocalInits,
	ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;

	bool usesThreadWrapperFunction() const override { return true; }
	LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
	QualType LValType) override;

	bool NeedsVTTParameter(GlobalDecl GD) override;

	/************************** RTTI Uniqueness ****************************/

	protected:
	/// Returns true if the ABI requires RTTI type_info objects to be unique
	/// across a program.
	virtual bool shouldRTTIBeUnique() const { return true; }

	public:
	/// What sort of unique-RTTI behavior should we use?
	enum RTTIUniquenessKind {
	/// We are guaranteeing, or need to guarantee, that the RTTI string
	/// is unique.
	RUK_Unique,

	/// We are not guaranteeing uniqueness for the RTTI string, so we
	/// can demote to hidden visibility but must use string comparisons.
	RUK_NonUniqueHidden,

	/// We are not guaranteeing uniqueness for the RTTI string, so we
	/// have to use string comparisons, but we also have to emit it with
	/// non-hidden visibility.
	RUK_NonUniqueVisible
	};

	/// Return the required visibility status for the given type and linkage in
	/// the current ABI.
	RTTIUniquenessKind
	classifyRTTIUniqueness(QualType CanTy,
	llvm::GlobalValue::LinkageTypes Linkage) const;
	friend class ItaniumRTTIBuilder;

	void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;

	private:
	bool hasAnyUnusedVirtualInlineFunction(const CXXRecordDecl *RD) const {
	const auto &VtableLayout =
	CGM.getItaniumVTableContext().getVTableLayout(RD);

	for (const auto &VtableComponent : VtableLayout.vtable_components()) {
	// Skip empty slot.
	if (!VtableComponent.isUsedFunctionPointerKind())
	continue;

	const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
	if (!Method->getCanonicalDecl()->isInlined())
	continue;

	StringRef Name = CGM.getMangledName(VtableComponent.getGlobalDecl());
	auto *Entry = CGM.GetGlobalValue(Name);
	// This checks if virtual inline function has already been emitted.
	// Note that it is possible that this inline function would be emitted
	// after trying to emit vtable speculatively. Because of this we do
	// an extra pass after emitting all deferred vtables to find and emit
	// these vtables opportunistically.
	if (!Entry \|\| Entry->isDeclaration())
	return true;
	}
	return false;
	}

	bool isVTableHidden(const CXXRecordDecl *RD) const {
	const auto &VtableLayout =
	CGM.getItaniumVTableContext().getVTableLayout(RD);

	for (const auto &VtableComponent : VtableLayout.vtable_components()) {
	if (VtableComponent.isRTTIKind()) {
	const CXXRecordDecl *RTTIDecl = VtableComponent.getRTTIDecl();
	if (RTTIDecl->getVisibility() == Visibility::HiddenVisibility)
	return true;
	} else if (VtableComponent.isUsedFunctionPointerKind()) {
	const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
	if (Method->getVisibility() == Visibility::HiddenVisibility &&
	!Method->isDefined())
	return true;
	}
	}
	return false;
	}
	};

	class ARMCXXABI : public ItaniumCXXABI {
	public:
	ARMCXXABI(CodeGen::CodeGenModule &CGM) :
	ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true,
	/* UseARMGuardVarABI = */ true) {}

	bool HasThisReturn(GlobalDecl GD) const override {
	return (isa<CXXConstructorDecl>(GD.getDecl()) \|\| (
	isa<CXXDestructorDecl>(GD.getDecl()) &&
	GD.getDtorType() != Dtor_Deleting));
	}

	void EmitReturnFromThunk(CodeGenFunction &CGF, RValue RV,
	QualType ResTy) override;

	CharUnits getArrayCookieSizeImpl(QualType elementType) override;
	Address InitializeArrayCookie(CodeGenFunction &CGF,
	Address NewPtr,
	llvm::Value *NumElements,
	const CXXNewExpr *expr,
	QualType ElementType) override;
	llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF, Address allocPtr,
	CharUnits cookieSize) override;
	};

	class iOS64CXXABI : public ARMCXXABI {
	public:
	iOS64CXXABI(CodeGen::CodeGenModule &CGM) : ARMCXXABI(CGM) {
	Use32BitVTableOffsetABI = true;
	}

	// ARM64 libraries are prepared for non-unique RTTI.
	bool shouldRTTIBeUnique() const override { return false; }
	};

	class WebAssemblyCXXABI final : public ItaniumCXXABI {
	public:
	explicit WebAssemblyCXXABI(CodeGen::CodeGenModule &CGM)
	: ItaniumCXXABI(CGM, /UseARMMethodPtrABI=/true,
	/UseARMGuardVarABI=/true) {}

	private:
	bool HasThisReturn(GlobalDecl GD) const override {
	return isa<CXXConstructorDecl>(GD.getDecl()) \|\|
	(isa<CXXDestructorDecl>(GD.getDecl()) &&
	GD.getDtorType() != Dtor_Deleting);
	}
	bool canCallMismatchedFunctionType() const override { return false; }
	};
	}

	CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
	switch (CGM.getTarget().getCXXABI().getKind()) {
	// For IR-generation purposes, there's no significant difference
	// between the ARM and iOS ABIs.
	case TargetCXXABI::GenericARM:
	case TargetCXXABI::iOS:
	case TargetCXXABI::WatchOS:
	return new ARMCXXABI(CGM);

	case TargetCXXABI::iOS64:
	return new iOS64CXXABI(CGM);

	// Note that AArch64 uses the generic ItaniumCXXABI class since it doesn't
	// include the other 32-bit ARM oddities: constructor/destructor return values
	// and array cookies.
	case TargetCXXABI::GenericAArch64:
	return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true,
	/* UseARMGuardVarABI = */ true);

	case TargetCXXABI::GenericMIPS:
	return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true);

	case TargetCXXABI::WebAssembly:
	return new WebAssemblyCXXABI(CGM);

	case TargetCXXABI::GenericItanium:
	if (CGM.getContext().getTargetInfo().getTriple().getArch()
	== llvm::Triple::le32) {
	// For PNaCl, use ARM-style method pointers so that PNaCl code
	// does not assume anything about the alignment of function
	// pointers.
	return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true,
	/* UseARMGuardVarABI = */ false);
	}
	return new ItaniumCXXABI(CGM);

	case TargetCXXABI::Microsoft:
	llvm_unreachable("Microsoft ABI is not Itanium-based");
	}
	llvm_unreachable("bad ABI kind");
	}

	llvm::Type *
	ItaniumCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
	if (MPT->isMemberDataPointer())
	return CGM.PtrDiffTy;
	return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy);
	}

	/// In the Itanium and ARM ABIs, method pointers have the form:
	/// struct { ptrdiff_t ptr; ptrdiff_t adj; } memptr;
	///
	/// In the Itanium ABI:
	/// - method pointers are virtual if (memptr.ptr & 1) is nonzero
	/// - the this-adjustment is (memptr.adj)
	/// - the virtual offset is (memptr.ptr - 1)
	///
	/// In the ARM ABI:
	/// - method pointers are virtual if (memptr.adj & 1) is nonzero
	/// - the this-adjustment is (memptr.adj >> 1)
	/// - the virtual offset is (memptr.ptr)
	/// ARM uses 'adj' for the virtual flag because Thumb functions
	/// may be only single-byte aligned.
	///
	/// If the member is virtual, the adjusted 'this' pointer points
	/// to a vtable pointer from which the virtual offset is applied.
	///
	/// If the member is non-virtual, memptr.ptr is the address of
	/// the function to call.
	CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
	CodeGenFunction &CGF, const Expr *E, Address ThisAddr,
	llvm::Value *&ThisPtrForCall,
	llvm::Value MemFnPtr, const MemberPointerType MPT) {
	CGBuilderTy &Builder = CGF.Builder;

	const FunctionProtoType *FPT =
	MPT->getPointeeType()->getAs<FunctionProtoType>();
	const CXXRecordDecl *RD =
	cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl());

	llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
	CGM.getTypes().arrangeCXXMethodType(RD, FPT, /FD=/nullptr));

	llvm::Constant *ptrdiff_1 = llvm::ConstantInt::get(CGM.PtrDiffTy, 1);

	llvm::BasicBlock *FnVirtual = CGF.createBasicBlock("memptr.virtual");
	llvm::BasicBlock *FnNonVirtual = CGF.createBasicBlock("memptr.nonvirtual");
	llvm::BasicBlock *FnEnd = CGF.createBasicBlock("memptr.end");

	// Extract memptr.adj, which is in the second field.
	llvm::Value *RawAdj = Builder.CreateExtractValue(MemFnPtr, 1, "memptr.adj");

	// Compute the true adjustment.
	llvm::Value *Adj = RawAdj;
	if (UseARMMethodPtrABI)
	Adj = Builder.CreateAShr(Adj, ptrdiff_1, "memptr.adj.shifted");

	// Apply the adjustment and cast back to the original struct type
	// for consistency.
	llvm::Value *This = ThisAddr.getPointer();
	llvm::Value *Ptr = Builder.CreateBitCast(This, Builder.getInt8PtrTy());
	Ptr = Builder.CreateInBoundsGEP(Ptr, Adj);
	This = Builder.CreateBitCast(Ptr, This->getType(), "this.adjusted");
	ThisPtrForCall = This;

	// Load the function pointer.
	llvm::Value *FnAsInt = Builder.CreateExtractValue(MemFnPtr, 0, "memptr.ptr");

	// If the LSB in the function pointer is 1, the function pointer points to
	// a virtual function.
	llvm::Value *IsVirtual;
	if (UseARMMethodPtrABI)
	IsVirtual = Builder.CreateAnd(RawAdj, ptrdiff_1);
	else
	IsVirtual = Builder.CreateAnd(FnAsInt, ptrdiff_1);
	IsVirtual = Builder.CreateIsNotNull(IsVirtual, "memptr.isvirtual");
	Builder.CreateCondBr(IsVirtual, FnVirtual, FnNonVirtual);

	// In the virtual path, the adjustment left 'This' pointing to the
	// vtable of the correct base subobject. The "function pointer" is an
	// offset within the vtable (+1 for the virtual flag on non-ARM).
	CGF.EmitBlock(FnVirtual);

	// Cast the adjusted this to a pointer to vtable pointer and load.
	llvm::Type *VTableTy = Builder.getInt8PtrTy();
	CharUnits VTablePtrAlign =
	CGF.CGM.getDynamicOffsetAlignment(ThisAddr.getAlignment(), RD,
	CGF.getPointerAlign());
	llvm::Value *VTable =
	CGF.GetVTablePtr(Address(This, VTablePtrAlign), VTableTy, RD);

	// Apply the offset.
	// On ARM64, to reserve extra space in virtual member function pointers,
	// we only pay attention to the low 32 bits of the offset.
	llvm::Value *VTableOffset = FnAsInt;
	if (!UseARMMethodPtrABI)
	VTableOffset = Builder.CreateSub(VTableOffset, ptrdiff_1);
	if (Use32BitVTableOffsetABI) {
	VTableOffset = Builder.CreateTrunc(VTableOffset, CGF.Int32Ty);
	VTableOffset = Builder.CreateZExt(VTableOffset, CGM.PtrDiffTy);
	}
	VTable = Builder.CreateGEP(VTable, VTableOffset);

	// Load the virtual function to call.
	VTable = Builder.CreateBitCast(VTable, FTy->getPointerTo()->getPointerTo());
	llvm::Value *VirtualFn =
	Builder.CreateAlignedLoad(VTable, CGF.getPointerAlign(),
	"memptr.virtualfn");
	CGF.EmitBranch(FnEnd);

	// In the non-virtual path, the function pointer is actually a
	// function pointer.
	CGF.EmitBlock(FnNonVirtual);
	llvm::Value *NonVirtualFn =
	Builder.CreateIntToPtr(FnAsInt, FTy->getPointerTo(), "memptr.nonvirtualfn");

	// We're done.
	CGF.EmitBlock(FnEnd);
	llvm::PHINode *CalleePtr = Builder.CreatePHI(FTy->getPointerTo(), 2);
	CalleePtr->addIncoming(VirtualFn, FnVirtual);
	CalleePtr->addIncoming(NonVirtualFn, FnNonVirtual);

	CGCallee Callee(FPT, CalleePtr);
	return Callee;
	}

	/// Compute an l-value by applying the given pointer-to-member to a
	/// base object.
	llvm::Value *ItaniumCXXABI::EmitMemberDataPointerAddress(
	CodeGenFunction &CGF, const Expr E, Address Base, llvm::Value MemPtr,
	const MemberPointerType *MPT) {
	assert(MemPtr->getType() == CGM.PtrDiffTy);

	CGBuilderTy &Builder = CGF.Builder;

	// Cast to char*.
	Base = Builder.CreateElementBitCast(Base, CGF.Int8Ty);

	// Apply the offset, which we assume is non-null.
	llvm::Value *Addr =
	Builder.CreateInBoundsGEP(Base.getPointer(), MemPtr, "memptr.offset");

	// Cast the address to the appropriate pointer type, adopting the
	// address space of the base pointer.
	llvm::Type *PType = CGF.ConvertTypeForMem(MPT->getPointeeType())
	->getPointerTo(Base.getAddressSpace());
	return Builder.CreateBitCast(Addr, PType);
	}

	/// Perform a bitcast, derived-to-base, or base-to-derived member pointer
	/// conversion.
	///
	/// Bitcast conversions are always a no-op under Itanium.
	///
	/// Obligatory offset/adjustment diagram:
	/// <-- offset --> <-- adjustment -->
	/// \|--------------------------\|----------------------\|--------------------\|
	/// ^Derived address point ^Base address point ^Member address point
	///
	/// So when converting a base member pointer to a derived member pointer,
	/// we add the offset to the adjustment because the address point has
	/// decreased; and conversely, when converting a derived MP to a base MP
	/// we subtract the offset from the adjustment because the address point
	/// has increased.
	///
	/// The standard forbids (at compile time) conversion to and from
	/// virtual bases, which is why we don't have to consider them here.
	///
	/// The standard forbids (at run time) casting a derived MP to a base
	/// MP when the derived MP does not point to a member of the base.
	/// This is why -1 is a reasonable choice for null data member
	/// pointers.
	llvm::Value *
	ItaniumCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
	const CastExpr *E,
	llvm::Value *src) {
	assert(E->getCastKind() == CK_DerivedToBaseMemberPointer \|\|
	E->getCastKind() == CK_BaseToDerivedMemberPointer \|\|
	E->getCastKind() == CK_ReinterpretMemberPointer);

	// Under Itanium, reinterprets don't require any additional processing.
	if (E->getCastKind() == CK_ReinterpretMemberPointer) return src;

	// Use constant emission if we can.
	if (isa<llvm::Constant>(src))
	return EmitMemberPointerConversion(E, cast<llvm::Constant>(src));

	llvm::Constant *adj = getMemberPointerAdjustment(E);
	if (!adj) return src;

	CGBuilderTy &Builder = CGF.Builder;
	bool isDerivedToBase = (E->getCastKind() == CK_DerivedToBaseMemberPointer);

	const MemberPointerType *destTy =
	E->getType()->castAs<MemberPointerType>();

	// For member data pointers, this is just a matter of adding the
	// offset if the source is non-null.
	if (destTy->isMemberDataPointer()) {
	llvm::Value *dst;
	if (isDerivedToBase)
	dst = Builder.CreateNSWSub(src, adj, "adj");
	else
	dst = Builder.CreateNSWAdd(src, adj, "adj");

	// Null check.
	llvm::Value *null = llvm::Constant::getAllOnesValue(src->getType());
	llvm::Value *isNull = Builder.CreateICmpEQ(src, null, "memptr.isnull");
	return Builder.CreateSelect(isNull, src, dst);
	}

	// The this-adjustment is left-shifted by 1 on ARM.
	if (UseARMMethodPtrABI) {
	uint64_t offset = cast<llvm::ConstantInt>(adj)->getZExtValue();
	offset <<= 1;
	adj = llvm::ConstantInt::get(adj->getType(), offset);
	}

	llvm::Value *srcAdj = Builder.CreateExtractValue(src, 1, "src.adj");
	llvm::Value *dstAdj;
	if (isDerivedToBase)
	dstAdj = Builder.CreateNSWSub(srcAdj, adj, "adj");
	else
	dstAdj = Builder.CreateNSWAdd(srcAdj, adj, "adj");

	return Builder.CreateInsertValue(src, dstAdj, 1);
	}

	llvm::Constant *
	ItaniumCXXABI::EmitMemberPointerConversion(const CastExpr *E,
	llvm::Constant *src) {
	assert(E->getCastKind() == CK_DerivedToBaseMemberPointer \|\|
	E->getCastKind() == CK_BaseToDerivedMemberPointer \|\|
	E->getCastKind() == CK_ReinterpretMemberPointer);

	// Under Itanium, reinterprets don't require any additional processing.
	if (E->getCastKind() == CK_ReinterpretMemberPointer) return src;

	// If the adjustment is trivial, we don't need to do anything.
	llvm::Constant *adj = getMemberPointerAdjustment(E);
	if (!adj) return src;

	bool isDerivedToBase = (E->getCastKind() == CK_DerivedToBaseMemberPointer);

	const MemberPointerType *destTy =
	E->getType()->castAs<MemberPointerType>();

	// For member data pointers, this is just a matter of adding the
	// offset if the source is non-null.
	if (destTy->isMemberDataPointer()) {
	// null maps to null.
	if (src->isAllOnesValue()) return src;

	if (isDerivedToBase)
	return llvm::ConstantExpr::getNSWSub(src, adj);
	else
	return llvm::ConstantExpr::getNSWAdd(src, adj);
	}

	// The this-adjustment is left-shifted by 1 on ARM.
	if (UseARMMethodPtrABI) {
	uint64_t offset = cast<llvm::ConstantInt>(adj)->getZExtValue();
	offset <<= 1;
	adj = llvm::ConstantInt::get(adj->getType(), offset);
	}

	llvm::Constant *srcAdj = llvm::ConstantExpr::getExtractValue(src, 1);
	llvm::Constant *dstAdj;
	if (isDerivedToBase)
	dstAdj = llvm::ConstantExpr::getNSWSub(srcAdj, adj);
	else
	dstAdj = llvm::ConstantExpr::getNSWAdd(srcAdj, adj);

	return llvm::ConstantExpr::getInsertValue(src, dstAdj, 1);
	}

	llvm::Constant *
	ItaniumCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) {
	// Itanium C++ ABI 2.3:
	// A NULL pointer is represented as -1.
	if (MPT->isMemberDataPointer())
	return llvm::ConstantInt::get(CGM.PtrDiffTy, -1ULL, /isSigned=/true);

	llvm::Constant *Zero = llvm::ConstantInt::get(CGM.PtrDiffTy, 0);
	llvm::Constant *Values[2] = { Zero, Zero };
	return llvm::ConstantStruct::getAnon(Values);
	}

	llvm::Constant *
	ItaniumCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
	CharUnits offset) {
	// Itanium C++ ABI 2.3:
	// A pointer to data member is an offset from the base address of
	// the class object containing it, represented as a ptrdiff_t
	return llvm::ConstantInt::get(CGM.PtrDiffTy, offset.getQuantity());
	}

	llvm::Constant *
	ItaniumCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) {
	return BuildMemberPointer(MD, CharUnits::Zero());
	}

	llvm::Constant ItaniumCXXABI::BuildMemberPointer(const CXXMethodDecl MD,
	CharUnits ThisAdjustment) {
	assert(MD->isInstance() && "Member function must not be static!");
	MD = MD->getCanonicalDecl();

	CodeGenTypes &Types = CGM.getTypes();

	// Get the function pointer (or index if this is a virtual function).
	llvm::Constant *MemPtr[2];
	if (MD->isVirtual()) {
	uint64_t Index = CGM.getItaniumVTableContext().getMethodVTableIndex(MD);

	const ASTContext &Context = getContext();
	CharUnits PointerWidth =
	Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0));
	uint64_t VTableOffset = (Index * PointerWidth.getQuantity());

	if (UseARMMethodPtrABI) {
	// ARM C++ ABI 3.2.1:
	// This ABI specifies that adj contains twice the this
	// adjustment, plus 1 if the member function is virtual. The
	// least significant bit of adj then makes exactly the same
	// discrimination as the least significant bit of ptr does for
	// Itanium.
	MemPtr[0] = llvm::ConstantInt::get(CGM.PtrDiffTy, VTableOffset);
	MemPtr[1] = llvm::ConstantInt::get(CGM.PtrDiffTy,
	2 * ThisAdjustment.getQuantity() + 1);
	} else {
	// Itanium C++ ABI 2.3:
	// For a virtual function, [the pointer field] is 1 plus the
	// virtual table offset (in bytes) of the function,
	// represented as a ptrdiff_t.
	MemPtr[0] = llvm::ConstantInt::get(CGM.PtrDiffTy, VTableOffset + 1);
	MemPtr[1] = llvm::ConstantInt::get(CGM.PtrDiffTy,
	ThisAdjustment.getQuantity());
	}
	} else {
	const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
	llvm::Type *Ty;
	// Check whether the function has a computable LLVM signature.
	if (Types.isFuncTypeConvertible(FPT)) {
	// The function has a computable LLVM signature; use the correct type.
	Ty = Types.GetFunctionType(Types.arrangeCXXMethodDeclaration(MD));
	} else {
	// Use an arbitrary non-function type to tell GetAddrOfFunction that the
	// function type is incomplete.
	Ty = CGM.PtrDiffTy;
	}
	llvm::Constant *addr = CGM.GetAddrOfFunction(MD, Ty);

	MemPtr[0] = llvm::ConstantExpr::getPtrToInt(addr, CGM.PtrDiffTy);
	MemPtr[1] = llvm::ConstantInt::get(CGM.PtrDiffTy,
	(UseARMMethodPtrABI ? 2 : 1) *
	ThisAdjustment.getQuantity());
	}

	return llvm::ConstantStruct::getAnon(MemPtr);
	}

	llvm::Constant *ItaniumCXXABI::EmitMemberPointer(const APValue &MP,
	QualType MPType) {
	const MemberPointerType *MPT = MPType->castAs<MemberPointerType>();
	const ValueDecl *MPD = MP.getMemberPointerDecl();
	if (!MPD)
	return EmitNullMemberPointer(MPT);

	CharUnits ThisAdjustment = getMemberPointerPathAdjustment(MP);

	if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MPD))
	return BuildMemberPointer(MD, ThisAdjustment);

	CharUnits FieldOffset =
	getContext().toCharUnitsFromBits(getContext().getFieldOffset(MPD));
	return EmitMemberDataPointer(MPT, ThisAdjustment + FieldOffset);
	}

	/// The comparison algorithm is pretty easy: the member pointers are
	/// the same if they're either bitwise identical or both null.
	///
	/// ARM is different here only because null-ness is more complicated.
	llvm::Value *
	ItaniumCXXABI::EmitMemberPointerComparison(CodeGenFunction &CGF,
	llvm::Value *L,
	llvm::Value *R,
	const MemberPointerType *MPT,
	bool Inequality) {
	CGBuilderTy &Builder = CGF.Builder;

	llvm::ICmpInst::Predicate Eq;
	llvm::Instruction::BinaryOps And, Or;
	if (Inequality) {
	Eq = llvm::ICmpInst::ICMP_NE;
	And = llvm::Instruction::Or;
	Or = llvm::Instruction::And;
	} else {
	Eq = llvm::ICmpInst::ICMP_EQ;
	And = llvm::Instruction::And;
	Or = llvm::Instruction::Or;
	}

	// Member data pointers are easy because there's a unique null
	// value, so it just comes down to bitwise equality.
	if (MPT->isMemberDataPointer())
	return Builder.CreateICmp(Eq, L, R);

	// For member function pointers, the tautologies are more complex.
	// The Itanium tautology is:
	// (L == R) <==> (L.ptr == R.ptr && (L.ptr == 0 \|\| L.adj == R.adj))
	// The ARM tautology is:
	// (L == R) <==> (L.ptr == R.ptr &&
	// (L.adj == R.adj \|\|
	// (L.ptr == 0 && ((L.adj\|R.adj) & 1) == 0)))
	// The inequality tautologies have exactly the same structure, except
	// applying De Morgan's laws.

	llvm::Value *LPtr = Builder.CreateExtractValue(L, 0, "lhs.memptr.ptr");
	llvm::Value *RPtr = Builder.CreateExtractValue(R, 0, "rhs.memptr.ptr");

	// This condition tests whether L.ptr == R.ptr. This must always be
	// true for equality to hold.
	llvm::Value *PtrEq = Builder.CreateICmp(Eq, LPtr, RPtr, "cmp.ptr");

	// This condition, together with the assumption that L.ptr == R.ptr,
	// tests whether the pointers are both null. ARM imposes an extra
	// condition.
	llvm::Value *Zero = llvm::Constant::getNullValue(LPtr->getType());
	llvm::Value *EqZero = Builder.CreateICmp(Eq, LPtr, Zero, "cmp.ptr.null");

	// This condition tests whether L.adj == R.adj. If this isn't
	// true, the pointers are unequal unless they're both null.
	llvm::Value *LAdj = Builder.CreateExtractValue(L, 1, "lhs.memptr.adj");
	llvm::Value *RAdj = Builder.CreateExtractValue(R, 1, "rhs.memptr.adj");
	llvm::Value *AdjEq = Builder.CreateICmp(Eq, LAdj, RAdj, "cmp.adj");

	// Null member function pointers on ARM clear the low bit of Adj,
	// so the zero condition has to check that neither low bit is set.
	if (UseARMMethodPtrABI) {
	llvm::Value *One = llvm::ConstantInt::get(LPtr->getType(), 1);

	// Compute (l.adj \| r.adj) & 1 and test it against zero.
	llvm::Value *OrAdj = Builder.CreateOr(LAdj, RAdj, "or.adj");
	llvm::Value *OrAdjAnd1 = Builder.CreateAnd(OrAdj, One);
	llvm::Value *OrAdjAnd1EqZero = Builder.CreateICmp(Eq, OrAdjAnd1, Zero,
	"cmp.or.adj");
	EqZero = Builder.CreateBinOp(And, EqZero, OrAdjAnd1EqZero);
	}

	// Tie together all our conditions.
	llvm::Value *Result = Builder.CreateBinOp(Or, EqZero, AdjEq);
	Result = Builder.CreateBinOp(And, PtrEq, Result,
	Inequality ? "memptr.ne" : "memptr.eq");
	return Result;
	}

	llvm::Value *
	ItaniumCXXABI::EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
	llvm::Value *MemPtr,
	const MemberPointerType *MPT) {
	CGBuilderTy &Builder = CGF.Builder;

	/// For member data pointers, this is just a check against -1.
	if (MPT->isMemberDataPointer()) {
	assert(MemPtr->getType() == CGM.PtrDiffTy);
	llvm::Value *NegativeOne =
	llvm::Constant::getAllOnesValue(MemPtr->getType());
	return Builder.CreateICmpNE(MemPtr, NegativeOne, "memptr.tobool");
	}

	// In Itanium, a member function pointer is not null if 'ptr' is not null.
	llvm::Value *Ptr = Builder.CreateExtractValue(MemPtr, 0, "memptr.ptr");

	llvm::Constant *Zero = llvm::ConstantInt::get(Ptr->getType(), 0);
	llvm::Value *Result = Builder.CreateICmpNE(Ptr, Zero, "memptr.tobool");

	// On ARM, a member function pointer is also non-null if the low bit of 'adj'
	// (the virtual bit) is set.
	if (UseARMMethodPtrABI) {
	llvm::Constant *One = llvm::ConstantInt::get(Ptr->getType(), 1);
	llvm::Value *Adj = Builder.CreateExtractValue(MemPtr, 1, "memptr.adj");
	llvm::Value *VirtualBit = Builder.CreateAnd(Adj, One, "memptr.virtualbit");
	llvm::Value *IsVirtual = Builder.CreateICmpNE(VirtualBit, Zero,
	"memptr.isvirtual");
	Result = Builder.CreateOr(Result, IsVirtual);
	}

	return Result;
	}

	bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
	const CXXRecordDecl *RD = FI.getReturnType()->getAsCXXRecordDecl();
	if (!RD)
	return false;

	- // Return indirectly if we have a non-trivial copy ctor or non-trivial dtor.
	- // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared
	- // special members.
	- if (RD->hasNonTrivialDestructor() \|\| RD->hasNonTrivialCopyConstructor()) {
	+ // If C++ prohibits us from making a copy, return by address.
	+ if (!canCopyArgument(RD)) {
	auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
	FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /ByVal=/false);
	return true;
	}
	return false;
	}

	/// The Itanium ABI requires non-zero initialization only for data
	/// member pointers, for which '0' is a valid offset.
	bool ItaniumCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
	return MPT->isMemberFunctionPointer();
	}

	/// The Itanium ABI always places an offset to the complete object
	/// at entry -2 in the vtable.
	void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
	const CXXDeleteExpr *DE,
	Address Ptr,
	QualType ElementType,
	const CXXDestructorDecl *Dtor) {
	bool UseGlobalDelete = DE->isGlobalDelete();
	if (UseGlobalDelete) {
	// Derive the complete-object pointer, which is what we need
	// to pass to the deallocation function.

	// Grab the vtable pointer as an intptr_t*.
	auto *ClassDecl =
	cast<CXXRecordDecl>(ElementType->getAs<RecordType>()->getDecl());
	llvm::Value *VTable =
	CGF.GetVTablePtr(Ptr, CGF.IntPtrTy->getPointerTo(), ClassDecl);

	// Track back to entry -2 and pull out the offset there.
	llvm::Value *OffsetPtr = CGF.Builder.CreateConstInBoundsGEP1_64(
	VTable, -2, "complete-offset.ptr");
	llvm::Value *Offset =
	CGF.Builder.CreateAlignedLoad(OffsetPtr, CGF.getPointerAlign());

	// Apply the offset.
	llvm::Value *CompletePtr =
	CGF.Builder.CreateBitCast(Ptr.getPointer(), CGF.Int8PtrTy);
	CompletePtr = CGF.Builder.CreateInBoundsGEP(CompletePtr, Offset);

	// If we're supposed to call the global delete, make sure we do so
	// even if the destructor throws.
	CGF.pushCallObjectDeleteCleanup(DE->getOperatorDelete(), CompletePtr,
	ElementType);
	}

	// FIXME: Provide a source location here even though there's no
	// CXXMemberCallExpr for dtor call.
	CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting;
	EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, /CE=/nullptr);

	if (UseGlobalDelete)
	CGF.PopCleanupBlock();
	}

	void ItaniumCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) {
	// void __cxa_rethrow();

	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGM.VoidTy, /IsVarArgs=/false);

	llvm::Constant *Fn = CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow");

	if (isNoReturn)
	CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, None);
	else
	CGF.EmitRuntimeCallOrInvoke(Fn);
	}

	static llvm::Constant *getAllocateExceptionFn(CodeGenModule &CGM) {
	// void *__cxa_allocate_exception(size_t thrown_size);

	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGM.Int8PtrTy, CGM.SizeTy, /IsVarArgs=/false);

	return CGM.CreateRuntimeFunction(FTy, "__cxa_allocate_exception");
	}

	static llvm::Constant *getThrowFn(CodeGenModule &CGM) {
	// void __cxa_throw(void thrown_exception, std::type_info tinfo,
	// void (dest) (void ));

	llvm::Type *Args[3] = { CGM.Int8PtrTy, CGM.Int8PtrTy, CGM.Int8PtrTy };
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGM.VoidTy, Args, /IsVarArgs=/false);

	return CGM.CreateRuntimeFunction(FTy, "__cxa_throw");
	}

	void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
	QualType ThrowType = E->getSubExpr()->getType();
	// Now allocate the exception object.
	llvm::Type *SizeTy = CGF.ConvertType(getContext().getSizeType());
	uint64_t TypeSize = getContext().getTypeSizeInChars(ThrowType).getQuantity();

	llvm::Constant *AllocExceptionFn = getAllocateExceptionFn(CGM);
	llvm::CallInst *ExceptionPtr = CGF.EmitNounwindRuntimeCall(
	AllocExceptionFn, llvm::ConstantInt::get(SizeTy, TypeSize), "exception");

	CharUnits ExnAlign = getAlignmentOfExnObject();
	CGF.EmitAnyExprToExn(E->getSubExpr(), Address(ExceptionPtr, ExnAlign));

	// Now throw the exception.
	llvm::Constant *TypeInfo = CGM.GetAddrOfRTTIDescriptor(ThrowType,
	/ForEH=/true);

	// The address of the destructor. If the exception type has a
	// trivial destructor (or isn't a record), we just pass null.
	llvm::Constant *Dtor = nullptr;
	if (const RecordType *RecordTy = ThrowType->getAs<RecordType>()) {
	CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordTy->getDecl());
	if (!Record->hasTrivialDestructor()) {
	CXXDestructorDecl *DtorD = Record->getDestructor();
	Dtor = CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete);
	Dtor = llvm::ConstantExpr::getBitCast(Dtor, CGM.Int8PtrTy);
	}
	}
	if (!Dtor) Dtor = llvm::Constant::getNullValue(CGM.Int8PtrTy);

	llvm::Value *args[] = { ExceptionPtr, TypeInfo, Dtor };
	CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(CGM), args);
	}

	static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) {
	// void __dynamic_cast(const void sub,
	// const abi::__class_type_info *src,
	// const abi::__class_type_info *dst,
	// std::ptrdiff_t src2dst_offset);

	llvm::Type *Int8PtrTy = CGF.Int8PtrTy;
	llvm::Type *PtrDiffTy =
	CGF.ConvertType(CGF.getContext().getPointerDiffType());

	llvm::Type *Args[4] = { Int8PtrTy, Int8PtrTy, Int8PtrTy, PtrDiffTy };

	llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, Args, false);

	// Mark the function as nounwind readonly.
	llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind,
	llvm::Attribute::ReadOnly };
	llvm::AttributeList Attrs = llvm::AttributeList::get(
	CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs);

	return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast", Attrs);
	}

	static llvm::Constant *getBadCastFn(CodeGenFunction &CGF) {
	// void __cxa_bad_cast();
	llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.VoidTy, false);
	return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_bad_cast");
	}

	/// \brief Compute the src2dst_offset hint as described in the
	/// Itanium C++ ABI [2.9.7]
	static CharUnits computeOffsetHint(ASTContext &Context,
	const CXXRecordDecl *Src,
	const CXXRecordDecl *Dst) {
	CXXBasePaths Paths(/FindAmbiguities=/true, /RecordPaths=/true,
	/DetectVirtual=/false);

	// If Dst is not derived from Src we can skip the whole computation below and
	// return that Src is not a public base of Dst. Record all inheritance paths.
	if (!Dst->isDerivedFrom(Src, Paths))
	return CharUnits::fromQuantity(-2ULL);

	unsigned NumPublicPaths = 0;
	CharUnits Offset;

	// Now walk all possible inheritance paths.
	for (const CXXBasePath &Path : Paths) {
	if (Path.Access != AS_public) // Ignore non-public inheritance.
	continue;

	++NumPublicPaths;

	for (const CXXBasePathElement &PathElement : Path) {
	// If the path contains a virtual base class we can't give any hint.
	// -1: no hint.
	if (PathElement.Base->isVirtual())
	return CharUnits::fromQuantity(-1ULL);

	if (NumPublicPaths > 1) // Won't use offsets, skip computation.
	continue;

	// Accumulate the base class offsets.
	const ASTRecordLayout &L = Context.getASTRecordLayout(PathElement.Class);
	Offset += L.getBaseClassOffset(
	PathElement.Base->getType()->getAsCXXRecordDecl());
	}
	}

	// -2: Src is not a public base of Dst.
	if (NumPublicPaths == 0)
	return CharUnits::fromQuantity(-2ULL);

	// -3: Src is a multiple public base type but never a virtual base type.
	if (NumPublicPaths > 1)
	return CharUnits::fromQuantity(-3ULL);

	// Otherwise, the Src type is a unique public nonvirtual base type of Dst.
	// Return the offset of Src from the origin of Dst.
	return Offset;
	}

	static llvm::Constant *getBadTypeidFn(CodeGenFunction &CGF) {
	// void __cxa_bad_typeid();
	llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.VoidTy, false);

	return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_bad_typeid");
	}

	bool ItaniumCXXABI::shouldTypeidBeNullChecked(bool IsDeref,
	QualType SrcRecordTy) {
	return IsDeref;
	}

	void ItaniumCXXABI::EmitBadTypeidCall(CodeGenFunction &CGF) {
	llvm::Value *Fn = getBadTypeidFn(CGF);
	CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn();
	CGF.Builder.CreateUnreachable();
	}

	llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF,
	QualType SrcRecordTy,
	Address ThisPtr,
	llvm::Type *StdTypeInfoPtrTy) {
	auto *ClassDecl =
	cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl());
	llvm::Value *Value =
	CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo(), ClassDecl);

	// Load the type info.
	Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL);
	return CGF.Builder.CreateAlignedLoad(Value, CGF.getPointerAlign());
	}

	bool ItaniumCXXABI::shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
	QualType SrcRecordTy) {
	return SrcIsPtr;
	}

	llvm::Value *ItaniumCXXABI::EmitDynamicCastCall(
	CodeGenFunction &CGF, Address ThisAddr, QualType SrcRecordTy,
	QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) {
	llvm::Type *PtrDiffLTy =
	CGF.ConvertType(CGF.getContext().getPointerDiffType());
	llvm::Type *DestLTy = CGF.ConvertType(DestTy);

	llvm::Value *SrcRTTI =
	CGF.CGM.GetAddrOfRTTIDescriptor(SrcRecordTy.getUnqualifiedType());
	llvm::Value *DestRTTI =
	CGF.CGM.GetAddrOfRTTIDescriptor(DestRecordTy.getUnqualifiedType());

	// Compute the offset hint.
	const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
	const CXXRecordDecl *DestDecl = DestRecordTy->getAsCXXRecordDecl();
	llvm::Value *OffsetHint = llvm::ConstantInt::get(
	PtrDiffLTy,
	computeOffsetHint(CGF.getContext(), SrcDecl, DestDecl).getQuantity());

	// Emit the call to __dynamic_cast.
	llvm::Value *Value = ThisAddr.getPointer();
	Value = CGF.EmitCastToVoidPtr(Value);

	llvm::Value *args[] = {Value, SrcRTTI, DestRTTI, OffsetHint};
	Value = CGF.EmitNounwindRuntimeCall(getItaniumDynamicCastFn(CGF), args);
	Value = CGF.Builder.CreateBitCast(Value, DestLTy);

	/// C++ [expr.dynamic.cast]p9:
	/// A failed cast to reference type throws std::bad_cast
	if (DestTy->isReferenceType()) {
	llvm::BasicBlock *BadCastBlock =
	CGF.createBasicBlock("dynamic_cast.bad_cast");

	llvm::Value *IsNull = CGF.Builder.CreateIsNull(Value);
	CGF.Builder.CreateCondBr(IsNull, BadCastBlock, CastEnd);

	CGF.EmitBlock(BadCastBlock);
	EmitBadCastCall(CGF);
	}

	return Value;
	}

	llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF,
	Address ThisAddr,
	QualType SrcRecordTy,
	QualType DestTy) {
	llvm::Type *PtrDiffLTy =
	CGF.ConvertType(CGF.getContext().getPointerDiffType());
	llvm::Type *DestLTy = CGF.ConvertType(DestTy);

	auto *ClassDecl =
	cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl());
	// Get the vtable pointer.
	llvm::Value *VTable = CGF.GetVTablePtr(ThisAddr, PtrDiffLTy->getPointerTo(),
	ClassDecl);

	// Get the offset-to-top from the vtable.
	llvm::Value *OffsetToTop =
	CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2ULL);
	OffsetToTop =
	CGF.Builder.CreateAlignedLoad(OffsetToTop, CGF.getPointerAlign(),
	"offset.to.top");

	// Finally, add the offset to the pointer.
	llvm::Value *Value = ThisAddr.getPointer();
	Value = CGF.EmitCastToVoidPtr(Value);
	Value = CGF.Builder.CreateInBoundsGEP(Value, OffsetToTop);

	return CGF.Builder.CreateBitCast(Value, DestLTy);
	}

	bool ItaniumCXXABI::EmitBadCastCall(CodeGenFunction &CGF) {
	llvm::Value *Fn = getBadCastFn(CGF);
	CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn();
	CGF.Builder.CreateUnreachable();
	return true;
	}

	llvm::Value *
	ItaniumCXXABI::GetVirtualBaseClassOffset(CodeGenFunction &CGF,
	Address This,
	const CXXRecordDecl *ClassDecl,
	const CXXRecordDecl *BaseClassDecl) {
	llvm::Value *VTablePtr = CGF.GetVTablePtr(This, CGM.Int8PtrTy, ClassDecl);
	CharUnits VBaseOffsetOffset =
	CGM.getItaniumVTableContext().getVirtualBaseOffsetOffset(ClassDecl,
	BaseClassDecl);

	llvm::Value *VBaseOffsetPtr =
	CGF.Builder.CreateConstGEP1_64(VTablePtr, VBaseOffsetOffset.getQuantity(),
	"vbase.offset.ptr");
	VBaseOffsetPtr = CGF.Builder.CreateBitCast(VBaseOffsetPtr,
	CGM.PtrDiffTy->getPointerTo());

	llvm::Value *VBaseOffset =
	CGF.Builder.CreateAlignedLoad(VBaseOffsetPtr, CGF.getPointerAlign(),
	"vbase.offset");

	return VBaseOffset;
	}

	void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
	// Just make sure we're in sync with TargetCXXABI.
	assert(CGM.getTarget().getCXXABI().hasConstructorVariants());

	// The constructor used for constructing this as a base class;
	// ignores virtual bases.
	CGM.EmitGlobal(GlobalDecl(D, Ctor_Base));

	// The constructor used for constructing this as a complete class;
	// constructs the virtual bases, then calls the base constructor.
	if (!D->getParent()->isAbstract()) {
	// We don't need to emit the complete ctor if the class is abstract.
	CGM.EmitGlobal(GlobalDecl(D, Ctor_Complete));
	}
	}

	CGCXXABI::AddedStructorArgs
	ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
	SmallVectorImpl<CanQualType> &ArgTys) {
	ASTContext &Context = getContext();

	// All parameters are already in place except VTT, which goes after 'this'.
	// These are Clang types, so we don't need to worry about sret yet.

	// Check if we need to add a VTT parameter (which has type void **).
	if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) {
	ArgTys.insert(ArgTys.begin() + 1,
	Context.getPointerType(Context.VoidPtrTy));
	return AddedStructorArgs::prefix(1);
	}
	return AddedStructorArgs{};
	}

	void ItaniumCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
	// The destructor used for destructing this as a base class; ignores
	// virtual bases.
	CGM.EmitGlobal(GlobalDecl(D, Dtor_Base));

	// The destructor used for destructing this as a most-derived class;
	// call the base destructor and then destructs any virtual bases.
	CGM.EmitGlobal(GlobalDecl(D, Dtor_Complete));

	// The destructor in a virtual table is always a 'deleting'
	// destructor, which calls the complete destructor and then uses the
	// appropriate operator delete.
	if (D->isVirtual())
	CGM.EmitGlobal(GlobalDecl(D, Dtor_Deleting));
	}

	void ItaniumCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
	QualType &ResTy,
	FunctionArgList &Params) {
	const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
	assert(isa<CXXConstructorDecl>(MD) \|\| isa<CXXDestructorDecl>(MD));

	// Check if we need a VTT parameter as well.
	if (NeedsVTTParameter(CGF.CurGD)) {
	ASTContext &Context = getContext();

	// FIXME: avoid the fake decl
	QualType T = Context.getPointerType(Context.VoidPtrTy);
	auto *VTTDecl = ImplicitParamDecl::Create(
	Context, /DC=/nullptr, MD->getLocation(), &Context.Idents.get("vtt"),
	T, ImplicitParamDecl::CXXVTT);
	Params.insert(Params.begin() + 1, VTTDecl);
	getStructorImplicitParamDecl(CGF) = VTTDecl;
	}
	}

	void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
	// Naked functions have no prolog.
	if (CGF.CurFuncDecl && CGF.CurFuncDecl->hasAttr<NakedAttr>())
	return;

	/// Initialize the 'this' slot.
	EmitThisParam(CGF);

	/// Initialize the 'vtt' slot if needed.
	if (getStructorImplicitParamDecl(CGF)) {
	getStructorImplicitParamValue(CGF) = CGF.Builder.CreateLoad(
	CGF.GetAddrOfLocalVar(getStructorImplicitParamDecl(CGF)), "vtt");
	}

	/// If this is a function that the ABI specifies returns 'this', initialize
	/// the return slot to 'this' at the start of the function.
	///
	/// Unlike the setting of return types, this is done within the ABI
	/// implementation instead of by clients of CGCXXABI because:
	/// 1) getThisValue is currently protected
	/// 2) in theory, an ABI could implement 'this' returns some other way;
	/// HasThisReturn only specifies a contract, not the implementation
	if (HasThisReturn(CGF.CurGD))
	CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue);
	}

	CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs(
	CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
	bool ForVirtualBase, bool Delegating, CallArgList &Args) {
	if (!NeedsVTTParameter(GlobalDecl(D, Type)))
	return AddedStructorArgs{};

	// Insert the implicit 'vtt' argument as the second argument.
	llvm::Value *VTT =
	CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase, Delegating);
	QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
	Args.insert(Args.begin() + 1,
	CallArg(RValue::get(VTT), VTTTy, /needscopy=/false));
	return AddedStructorArgs::prefix(1); // Added one arg.
	}

	void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
	const CXXDestructorDecl *DD,
	CXXDtorType Type, bool ForVirtualBase,
	bool Delegating, Address This) {
	GlobalDecl GD(DD, Type);
	llvm::Value *VTT = CGF.GetVTTParameter(GD, ForVirtualBase, Delegating);
	QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);

	CGCallee Callee;
	if (getContext().getLangOpts().AppleKext &&
	Type != Dtor_Base && DD->isVirtual())
	Callee = CGF.BuildAppleKextVirtualDestructorCall(DD, Type, DD->getParent());
	else
	Callee =
	CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)),
	DD);

	CGF.EmitCXXMemberOrOperatorCall(DD, Callee, ReturnValueSlot(),
	This.getPointer(), VTT, VTTTy,
	nullptr, nullptr);
	}

	void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
	const CXXRecordDecl *RD) {
	llvm::GlobalVariable *VTable = getAddrOfVTable(RD, CharUnits());
	if (VTable->hasInitializer())
	return;

	ItaniumVTableContext &VTContext = CGM.getItaniumVTableContext();
	const VTableLayout &VTLayout = VTContext.getVTableLayout(RD);
	llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
	llvm::Constant *RTTI =
	CGM.GetAddrOfRTTIDescriptor(CGM.getContext().getTagDeclType(RD));

	// Create and set the initializer.
	ConstantInitBuilder Builder(CGM);
	auto Components = Builder.beginStruct();
	CGVT.createVTableInitializer(Components, VTLayout, RTTI);
	Components.finishAndSetAsInitializer(VTable);

	// Set the correct linkage.
	VTable->setLinkage(Linkage);

	if (CGM.supportsCOMDAT() && VTable->isWeakForLinker())
	VTable->setComdat(CGM.getModule().getOrInsertComdat(VTable->getName()));

	// Set the right visibility.
	CGM.setGlobalVisibility(VTable, RD);

	// Use pointer alignment for the vtable. Otherwise we would align them based
	// on the size of the initializer which doesn't make sense as only single
	// values are read.
	unsigned PAlign = CGM.getTarget().getPointerAlign(0);
	VTable->setAlignment(getContext().toCharUnitsFromBits(PAlign).getQuantity());

	// If this is the magic class __cxxabiv1::__fundamental_type_info,
	// we will emit the typeinfo for the fundamental types. This is the
	// same behaviour as GCC.
	const DeclContext *DC = RD->getDeclContext();
	if (RD->getIdentifier() &&
	RD->getIdentifier()->isStr("__fundamental_type_info") &&
	isa<NamespaceDecl>(DC) && cast<NamespaceDecl>(DC)->getIdentifier() &&
	cast<NamespaceDecl>(DC)->getIdentifier()->isStr("__cxxabiv1") &&
	DC->getParent()->isTranslationUnit())
	EmitFundamentalRTTIDescriptors(RD->hasAttr<DLLExportAttr>());

	if (!VTable->isDeclarationForLinker())
	CGM.EmitVTableTypeMetadata(VTable, VTLayout);
	}

	bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField(
	CodeGenFunction &CGF, CodeGenFunction::VPtr Vptr) {
	if (Vptr.NearestVBase == nullptr)
	return false;
	return NeedsVTTParameter(CGF.CurGD);
	}

	llvm::Value *ItaniumCXXABI::getVTableAddressPointInStructor(
	CodeGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
	const CXXRecordDecl *NearestVBase) {

	if ((Base.getBase()->getNumVBases() \|\| NearestVBase != nullptr) &&
	NeedsVTTParameter(CGF.CurGD)) {
	return getVTableAddressPointInStructorWithVTT(CGF, VTableClass, Base,
	NearestVBase);
	}
	return getVTableAddressPoint(Base, VTableClass);
	}

	llvm::Constant *
	ItaniumCXXABI::getVTableAddressPoint(BaseSubobject Base,
	const CXXRecordDecl *VTableClass) {
	llvm::GlobalValue *VTable = getAddrOfVTable(VTableClass, CharUnits());

	// Find the appropriate vtable within the vtable group, and the address point
	// within that vtable.
	VTableLayout::AddressPointLocation AddressPoint =
	CGM.getItaniumVTableContext()
	.getVTableLayout(VTableClass)
	.getAddressPoint(Base);
	llvm::Value *Indices[] = {
	llvm::ConstantInt::get(CGM.Int32Ty, 0),
	llvm::ConstantInt::get(CGM.Int32Ty, AddressPoint.VTableIndex),
	llvm::ConstantInt::get(CGM.Int32Ty, AddressPoint.AddressPointIndex),
	};

	return llvm::ConstantExpr::getGetElementPtr(VTable->getValueType(), VTable,
	Indices, /InBounds=/true,
	/InRangeIndex=/1);
	}

	llvm::Value *ItaniumCXXABI::getVTableAddressPointInStructorWithVTT(
	CodeGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
	const CXXRecordDecl *NearestVBase) {
	assert((Base.getBase()->getNumVBases() \|\| NearestVBase != nullptr) &&
	NeedsVTTParameter(CGF.CurGD) && "This class doesn't have VTT");

	// Get the secondary vpointer index.
	uint64_t VirtualPointerIndex =
	CGM.getVTables().getSecondaryVirtualPointerIndex(VTableClass, Base);

	/// Load the VTT.
	llvm::Value *VTT = CGF.LoadCXXVTT();
	if (VirtualPointerIndex)
	VTT = CGF.Builder.CreateConstInBoundsGEP1_64(VTT, VirtualPointerIndex);

	// And load the address point from the VTT.
	return CGF.Builder.CreateAlignedLoad(VTT, CGF.getPointerAlign());
	}

	llvm::Constant *ItaniumCXXABI::getVTableAddressPointForConstExpr(
	BaseSubobject Base, const CXXRecordDecl *VTableClass) {
	return getVTableAddressPoint(Base, VTableClass);
	}

	llvm::GlobalVariable ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl RD,
	CharUnits VPtrOffset) {
	assert(VPtrOffset.isZero() && "Itanium ABI only supports zero vptr offsets");

	llvm::GlobalVariable *&VTable = VTables[RD];
	if (VTable)
	return VTable;

	// Queue up this vtable for possible deferred emission.
	CGM.addDeferredVTable(RD);

	SmallString<256> Name;
	llvm::raw_svector_ostream Out(Name);
	getMangleContext().mangleCXXVTable(RD, Out);

	const VTableLayout &VTLayout =
	CGM.getItaniumVTableContext().getVTableLayout(RD);
	llvm::Type *VTableType = CGM.getVTables().getVTableType(VTLayout);

	VTable = CGM.CreateOrReplaceCXXRuntimeVariable(
	Name, VTableType, llvm::GlobalValue::ExternalLinkage);
	VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);

	if (RD->hasAttr<DLLImportAttr>())
	VTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
	else if (RD->hasAttr<DLLExportAttr>())
	VTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);

	return VTable;
	}

	CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
	GlobalDecl GD,
	Address This,
	llvm::Type *Ty,
	SourceLocation Loc) {
	GD = GD.getCanonicalDecl();
	Ty = Ty->getPointerTo()->getPointerTo();
	auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl());
	llvm::Value *VTable = CGF.GetVTablePtr(This, Ty, MethodDecl->getParent());

	uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD);
	llvm::Value *VFunc;
	if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) {
	VFunc = CGF.EmitVTableTypeCheckedLoad(
	MethodDecl->getParent(), VTable,
	VTableIndex * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8);
	} else {
	CGF.EmitTypeMetadataCodeForVCall(MethodDecl->getParent(), VTable, Loc);

	llvm::Value *VFuncPtr =
	CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn");
	auto *VFuncLoad =
	CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());

	// Add !invariant.load md to virtual function load to indicate that
	// function didn't change inside vtable.
	// It's safe to add it without -fstrict-vtable-pointers, but it would not
	// help in devirtualization because it will only matter if we will have 2
	// the same virtual function loads from the same vtable load, which won't
	// happen without enabled devirtualization with -fstrict-vtable-pointers.
	if (CGM.getCodeGenOpts().OptimizationLevel > 0 &&
	CGM.getCodeGenOpts().StrictVTablePointers)
	VFuncLoad->setMetadata(
	llvm::LLVMContext::MD_invariant_load,
	llvm::MDNode::get(CGM.getLLVMContext(),
	llvm::ArrayRef<llvm::Metadata *>()));
	VFunc = VFuncLoad;
	}

	CGCallee Callee(MethodDecl, VFunc);
	return Callee;
	}

	llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
	CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType,
	Address This, const CXXMemberCallExpr *CE) {
	assert(CE == nullptr \|\| CE->arg_begin() == CE->arg_end());
	assert(DtorType == Dtor_Deleting \|\| DtorType == Dtor_Complete);

	const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
	Dtor, getFromDtorType(DtorType));
	llvm::Type Ty = CGF.CGM.getTypes().GetFunctionType(FInfo);
	CGCallee Callee =
	getVirtualFunctionPointer(CGF, GlobalDecl(Dtor, DtorType), This, Ty,
	CE ? CE->getLocStart() : SourceLocation());

	CGF.EmitCXXMemberOrOperatorCall(Dtor, Callee, ReturnValueSlot(),
	This.getPointer(), /ImplicitParam=/nullptr,
	QualType(), CE, nullptr);
	return nullptr;
	}

	void ItaniumCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) {
	CodeGenVTables &VTables = CGM.getVTables();
	llvm::GlobalVariable *VTT = VTables.GetAddrOfVTT(RD);
	VTables.EmitVTTDefinition(VTT, CGM.getVTableLinkage(RD), RD);
	}

	bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
	// We don't emit available_externally vtables if we are in -fapple-kext mode
	// because kext mode does not permit devirtualization.
	if (CGM.getLangOpts().AppleKext)
	return false;

	// If we don't have any not emitted inline virtual function, and if vtable is
	// not hidden, then we are safe to emit available_externally copy of vtable.
	// FIXME we can still emit a copy of the vtable if we
	// can emit definition of the inline functions.
	return !hasAnyUnusedVirtualInlineFunction(RD) && !isVTableHidden(RD);
	}
	static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF,
	Address InitialPtr,
	int64_t NonVirtualAdjustment,
	int64_t VirtualAdjustment,
	bool IsReturnAdjustment) {
	if (!NonVirtualAdjustment && !VirtualAdjustment)
	return InitialPtr.getPointer();

	Address V = CGF.Builder.CreateElementBitCast(InitialPtr, CGF.Int8Ty);

	// In a base-to-derived cast, the non-virtual adjustment is applied first.
	if (NonVirtualAdjustment && !IsReturnAdjustment) {
	V = CGF.Builder.CreateConstInBoundsByteGEP(V,
	CharUnits::fromQuantity(NonVirtualAdjustment));
	}

	// Perform the virtual adjustment if we have one.
	llvm::Value *ResultPtr;
	if (VirtualAdjustment) {
	llvm::Type *PtrDiffTy =
	CGF.ConvertType(CGF.getContext().getPointerDiffType());

	Address VTablePtrPtr = CGF.Builder.CreateElementBitCast(V, CGF.Int8PtrTy);
	llvm::Value *VTablePtr = CGF.Builder.CreateLoad(VTablePtrPtr);

	llvm::Value *OffsetPtr =
	CGF.Builder.CreateConstInBoundsGEP1_64(VTablePtr, VirtualAdjustment);

	OffsetPtr = CGF.Builder.CreateBitCast(OffsetPtr, PtrDiffTy->getPointerTo());

	// Load the adjustment offset from the vtable.
	llvm::Value *Offset =
	CGF.Builder.CreateAlignedLoad(OffsetPtr, CGF.getPointerAlign());

	// Adjust our pointer.
	ResultPtr = CGF.Builder.CreateInBoundsGEP(V.getPointer(), Offset);
	} else {
	ResultPtr = V.getPointer();
	}

	// In a derived-to-base conversion, the non-virtual adjustment is
	// applied second.
	if (NonVirtualAdjustment && IsReturnAdjustment) {
	ResultPtr = CGF.Builder.CreateConstInBoundsGEP1_64(ResultPtr,
	NonVirtualAdjustment);
	}

	// Cast back to the original type.
	return CGF.Builder.CreateBitCast(ResultPtr, InitialPtr.getType());
	}

	llvm::Value *ItaniumCXXABI::performThisAdjustment(CodeGenFunction &CGF,
	Address This,
	const ThisAdjustment &TA) {
	return performTypeAdjustment(CGF, This, TA.NonVirtual,
	TA.Virtual.Itanium.VCallOffsetOffset,
	/IsReturnAdjustment=/false);
	}

	llvm::Value *
	ItaniumCXXABI::performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
	const ReturnAdjustment &RA) {
	return performTypeAdjustment(CGF, Ret, RA.NonVirtual,
	RA.Virtual.Itanium.VBaseOffsetOffset,
	/IsReturnAdjustment=/true);
	}

	void ARMCXXABI::EmitReturnFromThunk(CodeGenFunction &CGF,
	RValue RV, QualType ResultType) {
	if (!isa<CXXDestructorDecl>(CGF.CurGD.getDecl()))
	return ItaniumCXXABI::EmitReturnFromThunk(CGF, RV, ResultType);

	// Destructor thunks in the ARM ABI have indeterminate results.
	llvm::Type *T = CGF.ReturnValue.getElementType();
	RValue Undef = RValue::get(llvm::UndefValue::get(T));
	return ItaniumCXXABI::EmitReturnFromThunk(CGF, Undef, ResultType);
	}

	/************************ Array allocation cookies ************************/

	CharUnits ItaniumCXXABI::getArrayCookieSizeImpl(QualType elementType) {
	// The array cookie is a size_t; pad that up to the element alignment.
	// The cookie is actually right-justified in that space.
	return std::max(CharUnits::fromQuantity(CGM.SizeSizeInBytes),
	CGM.getContext().getTypeAlignInChars(elementType));
	}

	Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
	Address NewPtr,
	llvm::Value *NumElements,
	const CXXNewExpr *expr,
	QualType ElementType) {
	assert(requiresArrayCookie(expr));

	unsigned AS = NewPtr.getAddressSpace();

	ASTContext &Ctx = getContext();
	CharUnits SizeSize = CGF.getSizeSize();

	// The size of the cookie.
	CharUnits CookieSize =
	std::max(SizeSize, Ctx.getTypeAlignInChars(ElementType));
	assert(CookieSize == getArrayCookieSizeImpl(ElementType));

	// Compute an offset to the cookie.
	Address CookiePtr = NewPtr;
	CharUnits CookieOffset = CookieSize - SizeSize;
	if (!CookieOffset.isZero())
	CookiePtr = CGF.Builder.CreateConstInBoundsByteGEP(CookiePtr, CookieOffset);

	// Write the number of elements into the appropriate slot.
	Address NumElementsPtr =
	CGF.Builder.CreateElementBitCast(CookiePtr, CGF.SizeTy);
	llvm::Instruction *SI = CGF.Builder.CreateStore(NumElements, NumElementsPtr);

	// Handle the array cookie specially in ASan.
	if (CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) && AS == 0 &&
	expr->getOperatorNew()->isReplaceableGlobalAllocationFunction()) {
	// The store to the CookiePtr does not need to be instrumented.
	CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI);
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGM.VoidTy, NumElementsPtr.getType(), false);
	llvm::Constant *F =
	CGM.CreateRuntimeFunction(FTy, "__asan_poison_cxx_array_cookie");
	CGF.Builder.CreateCall(F, NumElementsPtr.getPointer());
	}

	// Finally, compute a pointer to the actual data buffer by skipping
	// over the cookie completely.
	return CGF.Builder.CreateConstInBoundsByteGEP(NewPtr, CookieSize);
	}

	llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
	Address allocPtr,
	CharUnits cookieSize) {
	// The element size is right-justified in the cookie.
	Address numElementsPtr = allocPtr;
	CharUnits numElementsOffset = cookieSize - CGF.getSizeSize();
	if (!numElementsOffset.isZero())
	numElementsPtr =
	CGF.Builder.CreateConstInBoundsByteGEP(numElementsPtr, numElementsOffset);

	unsigned AS = allocPtr.getAddressSpace();
	numElementsPtr = CGF.Builder.CreateElementBitCast(numElementsPtr, CGF.SizeTy);
	if (!CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) \|\| AS != 0)
	return CGF.Builder.CreateLoad(numElementsPtr);
	// In asan mode emit a function call instead of a regular load and let the
	// run-time deal with it: if the shadow is properly poisoned return the
	// cookie, otherwise return 0 to avoid an infinite loop calling DTORs.
	// We can't simply ignore this load using nosanitize metadata because
	// the metadata may be lost.
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGF.SizeTy, CGF.SizeTy->getPointerTo(0), false);
	llvm::Constant *F =
	CGM.CreateRuntimeFunction(FTy, "__asan_load_cxx_array_cookie");
	return CGF.Builder.CreateCall(F, numElementsPtr.getPointer());
	}

	CharUnits ARMCXXABI::getArrayCookieSizeImpl(QualType elementType) {
	// ARM says that the cookie is always:
	// struct array_cookie {
	// std::size_t element_size; // element_size != 0
	// std::size_t element_count;
	// };
	// But the base ABI doesn't give anything an alignment greater than
	// 8, so we can dismiss this as typical ABI-author blindness to
	// actual language complexity and round up to the element alignment.
	return std::max(CharUnits::fromQuantity(2 * CGM.SizeSizeInBytes),
	CGM.getContext().getTypeAlignInChars(elementType));
	}

	Address ARMCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
	Address newPtr,
	llvm::Value *numElements,
	const CXXNewExpr *expr,
	QualType elementType) {
	assert(requiresArrayCookie(expr));

	// The cookie is always at the start of the buffer.
	Address cookie = newPtr;

	// The first element is the element size.
	cookie = CGF.Builder.CreateElementBitCast(cookie, CGF.SizeTy);
	llvm::Value *elementSize = llvm::ConstantInt::get(CGF.SizeTy,
	getContext().getTypeSizeInChars(elementType).getQuantity());
	CGF.Builder.CreateStore(elementSize, cookie);

	// The second element is the element count.
	cookie = CGF.Builder.CreateConstInBoundsGEP(cookie, 1, CGF.getSizeSize());
	CGF.Builder.CreateStore(numElements, cookie);

	// Finally, compute a pointer to the actual data buffer by skipping
	// over the cookie completely.
	CharUnits cookieSize = ARMCXXABI::getArrayCookieSizeImpl(elementType);
	return CGF.Builder.CreateConstInBoundsByteGEP(newPtr, cookieSize);
	}

	llvm::Value *ARMCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
	Address allocPtr,
	CharUnits cookieSize) {
	// The number of elements is at offset sizeof(size_t) relative to
	// the allocated pointer.
	Address numElementsPtr
	= CGF.Builder.CreateConstInBoundsByteGEP(allocPtr, CGF.getSizeSize());

	numElementsPtr = CGF.Builder.CreateElementBitCast(numElementsPtr, CGF.SizeTy);
	return CGF.Builder.CreateLoad(numElementsPtr);
	}

	/********************* Static local initialization ************************/

	static llvm::Constant *getGuardAcquireFn(CodeGenModule &CGM,
	llvm::PointerType *GuardPtrTy) {
	// int __cxa_guard_acquire(__guard *guard_object);
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGM.getTypes().ConvertType(CGM.getContext().IntTy),
	GuardPtrTy, /isVarArg=/false);
	return CGM.CreateRuntimeFunction(
	FTy, "__cxa_guard_acquire",
	llvm::AttributeList::get(CGM.getLLVMContext(),
	llvm::AttributeList::FunctionIndex,
	llvm::Attribute::NoUnwind));
	}

	static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM,
	llvm::PointerType *GuardPtrTy) {
	// void __cxa_guard_release(__guard *guard_object);
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /isVarArg=/false);
	return CGM.CreateRuntimeFunction(
	FTy, "__cxa_guard_release",
	llvm::AttributeList::get(CGM.getLLVMContext(),
	llvm::AttributeList::FunctionIndex,
	llvm::Attribute::NoUnwind));
	}

	static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM,
	llvm::PointerType *GuardPtrTy) {
	// void __cxa_guard_abort(__guard *guard_object);
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /isVarArg=/false);
	return CGM.CreateRuntimeFunction(
	FTy, "__cxa_guard_abort",
	llvm::AttributeList::get(CGM.getLLVMContext(),
	llvm::AttributeList::FunctionIndex,
	llvm::Attribute::NoUnwind));
	}

	namespace {
	struct CallGuardAbort final : EHScopeStack::Cleanup {
	llvm::GlobalVariable *Guard;
	CallGuardAbort(llvm::GlobalVariable *Guard) : Guard(Guard) {}

	void Emit(CodeGenFunction &CGF, Flags flags) override {
	CGF.EmitNounwindRuntimeCall(getGuardAbortFn(CGF.CGM, Guard->getType()),
	Guard);
	}
	};
	}

	/// The ARM code here follows the Itanium code closely enough that we
	/// just special-case it at particular places.
	void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
	const VarDecl &D,
	llvm::GlobalVariable *var,
	bool shouldPerformInit) {
	CGBuilderTy &Builder = CGF.Builder;

	// Inline variables that weren't instantiated from variable templates have
	// partially-ordered initialization within their translation unit.
	bool NonTemplateInline =
	D.isInline() &&
	!isTemplateInstantiation(D.getTemplateSpecializationKind());

	// We only need to use thread-safe statics for local non-TLS variables and
	// inline variables; other global initialization is always single-threaded
	// or (through lazy dynamic loading in multiple threads) unsequenced.
	bool threadsafe = getContext().getLangOpts().ThreadsafeStatics &&
	(D.isLocalVarDecl() \|\| NonTemplateInline) &&
	!D.getTLSKind();

	// If we have a global variable with internal linkage and thread-safe statics
	// are disabled, we can just let the guard variable be of type i8.
	bool useInt8GuardVariable = !threadsafe && var->hasInternalLinkage();

	llvm::IntegerType *guardTy;
	CharUnits guardAlignment;
	if (useInt8GuardVariable) {
	guardTy = CGF.Int8Ty;
	guardAlignment = CharUnits::One();
	} else {
	// Guard variables are 64 bits in the generic ABI and size width on ARM
	// (i.e. 32-bit on AArch32, 64-bit on AArch64).
	if (UseARMGuardVarABI) {
	guardTy = CGF.SizeTy;
	guardAlignment = CGF.getSizeAlign();
	} else {
	guardTy = CGF.Int64Ty;
	guardAlignment = CharUnits::fromQuantity(
	CGM.getDataLayout().getABITypeAlignment(guardTy));
	}
	}
	llvm::PointerType *guardPtrTy = guardTy->getPointerTo();

	// Create the guard variable if we don't already have it (as we
	// might if we're double-emitting this function body).
	llvm::GlobalVariable *guard = CGM.getStaticLocalDeclGuardAddress(&D);
	if (!guard) {
	// Mangle the name for the guard.
	SmallString<256> guardName;
	{
	llvm::raw_svector_ostream out(guardName);
	getMangleContext().mangleStaticGuardVariable(&D, out);
	}

	// Create the guard variable with a zero-initializer.
	// Just absorb linkage and visibility from the guarded variable.
	guard = new llvm::GlobalVariable(CGM.getModule(), guardTy,
	false, var->getLinkage(),
	llvm::ConstantInt::get(guardTy, 0),
	guardName.str());
	guard->setVisibility(var->getVisibility());
	// If the variable is thread-local, so is its guard variable.
	guard->setThreadLocalMode(var->getThreadLocalMode());
	guard->setAlignment(guardAlignment.getQuantity());

	// The ABI says: "It is suggested that it be emitted in the same COMDAT
	// group as the associated data object." In practice, this doesn't work for
	// non-ELF and non-Wasm object formats, so only do it for ELF and Wasm.
	llvm::Comdat *C = var->getComdat();
	if (!D.isLocalVarDecl() && C &&
	(CGM.getTarget().getTriple().isOSBinFormatELF() \|\|
	CGM.getTarget().getTriple().isOSBinFormatWasm())) {
	guard->setComdat(C);
	// An inline variable's guard function is run from the per-TU
	// initialization function, not via a dedicated global ctor function, so
	// we can't put it in a comdat.
	if (!NonTemplateInline)
	CGF.CurFn->setComdat(C);
	} else if (CGM.supportsCOMDAT() && guard->isWeakForLinker()) {
	guard->setComdat(CGM.getModule().getOrInsertComdat(guard->getName()));
	}

	CGM.setStaticLocalDeclGuardAddress(&D, guard);
	}

	Address guardAddr = Address(guard, guardAlignment);

	// Test whether the variable has completed initialization.
	//
	// Itanium C++ ABI 3.3.2:
	// The following is pseudo-code showing how these functions can be used:
	// if (obj_guard.first_byte == 0) {
	// if ( __cxa_guard_acquire (&obj_guard) ) {
	// try {
	// ... initialize the object ...;
	// } catch (...) {
	// __cxa_guard_abort (&obj_guard);
	// throw;
	// }
	// ... queue object destructor with __cxa_atexit() ...;
	// __cxa_guard_release (&obj_guard);
	// }
	// }

	// Load the first byte of the guard variable.
	llvm::LoadInst *LI =
	Builder.CreateLoad(Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty));

	// Itanium ABI:
	// An implementation supporting thread-safety on multiprocessor
	// systems must also guarantee that references to the initialized
	// object do not occur before the load of the initialization flag.
	//
	// In LLVM, we do this by marking the load Acquire.
	if (threadsafe)
	LI->setAtomic(llvm::AtomicOrdering::Acquire);

	// For ARM, we should only check the first bit, rather than the entire byte:
	//
	// ARM C++ ABI 3.2.3.1:
	// To support the potential use of initialization guard variables
	// as semaphores that are the target of ARM SWP and LDREX/STREX
	// synchronizing instructions we define a static initialization
	// guard variable to be a 4-byte aligned, 4-byte word with the
	// following inline access protocol.
	// #define INITIALIZED 1
	// if ((obj_guard & INITIALIZED) != INITIALIZED) {
	// if (__cxa_guard_acquire(&obj_guard))
	// ...
	// }
	//
	// and similarly for ARM64:
	//
	// ARM64 C++ ABI 3.2.2:
	// This ABI instead only specifies the value bit 0 of the static guard
	// variable; all other bits are platform defined. Bit 0 shall be 0 when the
	// variable is not initialized and 1 when it is.
	llvm::Value *V =
	(UseARMGuardVarABI && !useInt8GuardVariable)
	? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1))
	: LI;
	llvm::Value *isInitialized = Builder.CreateIsNull(V, "guard.uninitialized");

	llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check");
	llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");

	// Check if the first byte of the guard variable is zero.
	Builder.CreateCondBr(isInitialized, InitCheckBlock, EndBlock);

	CGF.EmitBlock(InitCheckBlock);

	// Variables used when coping with thread-safe statics and exceptions.
	if (threadsafe) {
	// Call __cxa_guard_acquire.
	llvm::Value *V
	= CGF.EmitNounwindRuntimeCall(getGuardAcquireFn(CGM, guardPtrTy), guard);

	llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");

	Builder.CreateCondBr(Builder.CreateIsNotNull(V, "tobool"),
	InitBlock, EndBlock);

	// Call __cxa_guard_abort along the exceptional edge.
	CGF.EHStack.pushCleanup<CallGuardAbort>(EHCleanup, guard);

	CGF.EmitBlock(InitBlock);
	}

	// Emit the initializer and add a global destructor if appropriate.
	CGF.EmitCXXGlobalVarDeclInit(D, var, shouldPerformInit);

	if (threadsafe) {
	// Pop the guard-abort cleanup if we pushed one.
	CGF.PopCleanupBlock();

	// Call __cxa_guard_release. This cannot throw.
	CGF.EmitNounwindRuntimeCall(getGuardReleaseFn(CGM, guardPtrTy),
	guardAddr.getPointer());
	} else {
	Builder.CreateStore(llvm::ConstantInt::get(guardTy, 1), guardAddr);
	}

	CGF.EmitBlock(EndBlock);
	}

	/// Register a global destructor using __cxa_atexit.
	static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF,
	llvm::Constant *dtor,
	llvm::Constant *addr,
	bool TLS) {
	const char *Name = "__cxa_atexit";
	if (TLS) {
	const llvm::Triple &T = CGF.getTarget().getTriple();
	Name = T.isOSDarwin() ? "_tlv_atexit" : "__cxa_thread_atexit";
	}

	// We're assuming that the destructor function is something we can
	// reasonably call with the default CC. Go ahead and cast it to the
	// right prototype.
	llvm::Type *dtorTy =
	llvm::FunctionType::get(CGF.VoidTy, CGF.Int8PtrTy, false)->getPointerTo();

	// extern "C" int __cxa_atexit(void (f)(void ), void p, void d);
	llvm::Type *paramTys[] = { dtorTy, CGF.Int8PtrTy, CGF.Int8PtrTy };
	llvm::FunctionType *atexitTy =
	llvm::FunctionType::get(CGF.IntTy, paramTys, false);

	// Fetch the actual function.
	llvm::Constant *atexit = CGF.CGM.CreateRuntimeFunction(atexitTy, Name);
	if (llvm::Function *fn = dyn_cast<llvm::Function>(atexit))
	fn->setDoesNotThrow();

	// Create a variable that binds the atexit to this shared object.
	llvm::Constant *handle =
	CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle");
	auto *GV = cast<llvm::GlobalValue>(handle->stripPointerCasts());
	GV->setVisibility(llvm::GlobalValue::HiddenVisibility);

	llvm::Value *args[] = {
	llvm::ConstantExpr::getBitCast(dtor, dtorTy),
	llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy),
	handle
	};
	CGF.EmitNounwindRuntimeCall(atexit, args);
	}

	/// Register a global destructor as best as we know how.
	void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF,
	const VarDecl &D,
	llvm::Constant *dtor,
	llvm::Constant *addr) {
	// Use __cxa_atexit if available.
	if (CGM.getCodeGenOpts().CXAAtExit)
	return emitGlobalDtorWithCXAAtExit(CGF, dtor, addr, D.getTLSKind());

	if (D.getTLSKind())
	CGM.ErrorUnsupported(&D, "non-trivial TLS destruction");

	// In Apple kexts, we want to add a global destructor entry.
	// FIXME: shouldn't this be guarded by some variable?
	if (CGM.getLangOpts().AppleKext) {
	// Generate a global destructor entry.
	return CGM.AddCXXDtorEntry(dtor, addr);
	}

	CGF.registerGlobalDtorWithAtExit(D, dtor, addr);
	}

	static bool isThreadWrapperReplaceable(const VarDecl *VD,
	CodeGen::CodeGenModule &CGM) {
	assert(!VD->isStaticLocal() && "static local VarDecls don't need wrappers!");
	// Darwin prefers to have references to thread local variables to go through
	// the thread wrapper instead of directly referencing the backing variable.
	return VD->getTLSKind() == VarDecl::TLS_Dynamic &&
	CGM.getTarget().getTriple().isOSDarwin();
	}

	/// Get the appropriate linkage for the wrapper function. This is essentially
	/// the weak form of the variable's linkage; every translation unit which needs
	/// the wrapper emits a copy, and we want the linker to merge them.
	static llvm::GlobalValue::LinkageTypes
	getThreadLocalWrapperLinkage(const VarDecl *VD, CodeGen::CodeGenModule &CGM) {
	llvm::GlobalValue::LinkageTypes VarLinkage =
	CGM.getLLVMLinkageVarDefinition(VD, /isConstant=/false);

	// For internal linkage variables, we don't need an external or weak wrapper.
	if (llvm::GlobalValue::isLocalLinkage(VarLinkage))
	return VarLinkage;

	// If the thread wrapper is replaceable, give it appropriate linkage.
	if (isThreadWrapperReplaceable(VD, CGM))
	if (!llvm::GlobalVariable::isLinkOnceLinkage(VarLinkage) &&
	!llvm::GlobalVariable::isWeakODRLinkage(VarLinkage))
	return VarLinkage;
	return llvm::GlobalValue::WeakODRLinkage;
	}

	llvm::Function *
	ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
	llvm::Value *Val) {
	// Mangle the name for the thread_local wrapper function.
	SmallString<256> WrapperName;
	{
	llvm::raw_svector_ostream Out(WrapperName);
	getMangleContext().mangleItaniumThreadLocalWrapper(VD, Out);
	}

	// FIXME: If VD is a definition, we should regenerate the function attributes
	// before returning.
	if (llvm::Value *V = CGM.getModule().getNamedValue(WrapperName))
	return cast<llvm::Function>(V);

	QualType RetQT = VD->getType();
	if (RetQT->isReferenceType())
	RetQT = RetQT.getNonReferenceType();

	const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
	getContext().getPointerType(RetQT), FunctionArgList());

	llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FI);
	llvm::Function *Wrapper =
	llvm::Function::Create(FnTy, getThreadLocalWrapperLinkage(VD, CGM),
	WrapperName.str(), &CGM.getModule());

	CGM.SetLLVMFunctionAttributes(nullptr, FI, Wrapper);

	if (VD->hasDefinition())
	CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);

	// Always resolve references to the wrapper at link time.
	if (!Wrapper->hasLocalLinkage() && !(isThreadWrapperReplaceable(VD, CGM) &&
	!llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) &&
	!llvm::GlobalVariable::isWeakODRLinkage(Wrapper->getLinkage())))
	Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility);

	if (isThreadWrapperReplaceable(VD, CGM)) {
	Wrapper->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
	Wrapper->addFnAttr(llvm::Attribute::NoUnwind);
	}
	return Wrapper;
	}

	void ItaniumCXXABI::EmitThreadLocalInitFuncs(
	CodeGenModule &CGM, ArrayRef<const VarDecl *> CXXThreadLocals,
	ArrayRef<llvm::Function *> CXXThreadLocalInits,
	ArrayRef<const VarDecl *> CXXThreadLocalInitVars) {
	llvm::Function *InitFunc = nullptr;

	// Separate initializers into those with ordered (or partially-ordered)
	// initialization and those with unordered initialization.
	llvm::SmallVector<llvm::Function *, 8> OrderedInits;
	llvm::SmallDenseMap<const VarDecl , llvm::Function > UnorderedInits;
	for (unsigned I = 0; I != CXXThreadLocalInits.size(); ++I) {
	if (isTemplateInstantiation(
	CXXThreadLocalInitVars[I]->getTemplateSpecializationKind()))
	UnorderedInits[CXXThreadLocalInitVars[I]->getCanonicalDecl()] =
	CXXThreadLocalInits[I];
	else
	OrderedInits.push_back(CXXThreadLocalInits[I]);
	}

	if (!OrderedInits.empty()) {
	// Generate a guarded initialization function.
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGM.VoidTy, /isVarArg=/false);
	const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
	InitFunc = CGM.CreateGlobalInitOrDestructFunction(FTy, "__tls_init", FI,
	SourceLocation(),
	/TLS=/true);
	llvm::GlobalVariable *Guard = new llvm::GlobalVariable(
	CGM.getModule(), CGM.Int8Ty, /isConstant=/false,
	llvm::GlobalVariable::InternalLinkage,
	llvm::ConstantInt::get(CGM.Int8Ty, 0), "__tls_guard");
	Guard->setThreadLocal(true);

	CharUnits GuardAlign = CharUnits::One();
	Guard->setAlignment(GuardAlign.getQuantity());

	CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, OrderedInits,
	Address(Guard, GuardAlign));
	// On Darwin platforms, use CXX_FAST_TLS calling convention.
	if (CGM.getTarget().getTriple().isOSDarwin()) {
	InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
	InitFunc->addFnAttr(llvm::Attribute::NoUnwind);
	}
	}

	// Emit thread wrappers.
	for (const VarDecl *VD : CXXThreadLocals) {
	llvm::GlobalVariable *Var =
	cast<llvm::GlobalVariable>(CGM.GetGlobalValue(CGM.getMangledName(VD)));
	llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Var);

	// Some targets require that all access to thread local variables go through
	// the thread wrapper. This means that we cannot attempt to create a thread
	// wrapper or a thread helper.
	if (isThreadWrapperReplaceable(VD, CGM) && !VD->hasDefinition()) {
	Wrapper->setLinkage(llvm::Function::ExternalLinkage);
	continue;
	}

	// Mangle the name for the thread_local initialization function.
	SmallString<256> InitFnName;
	{
	llvm::raw_svector_ostream Out(InitFnName);
	getMangleContext().mangleItaniumThreadLocalInit(VD, Out);
	}

	// If we have a definition for the variable, emit the initialization
	// function as an alias to the global Init function (if any). Otherwise,
	// produce a declaration of the initialization function.
	llvm::GlobalValue *Init = nullptr;
	bool InitIsInitFunc = false;
	if (VD->hasDefinition()) {
	InitIsInitFunc = true;
	llvm::Function *InitFuncToUse = InitFunc;
	if (isTemplateInstantiation(VD->getTemplateSpecializationKind()))
	InitFuncToUse = UnorderedInits.lookup(VD->getCanonicalDecl());
	if (InitFuncToUse)
	Init = llvm::GlobalAlias::create(Var->getLinkage(), InitFnName.str(),
	InitFuncToUse);
	} else {
	// Emit a weak global function referring to the initialization function.
	// This function will not exist if the TU defining the thread_local
	// variable in question does not need any dynamic initialization for
	// its thread_local variables.
	llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, false);
	Init = llvm::Function::Create(FnTy,
	llvm::GlobalVariable::ExternalWeakLinkage,
	InitFnName.str(), &CGM.getModule());
	const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
	CGM.SetLLVMFunctionAttributes(nullptr, FI, cast<llvm::Function>(Init));
	}

	if (Init)
	Init->setVisibility(Var->getVisibility());

	llvm::LLVMContext &Context = CGM.getModule().getContext();
	llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper);
	CGBuilderTy Builder(CGM, Entry);
	if (InitIsInitFunc) {
	if (Init) {
	llvm::CallInst *CallVal = Builder.CreateCall(Init);
	if (isThreadWrapperReplaceable(VD, CGM))
	CallVal->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
	}
	} else {
	// Don't know whether we have an init function. Call it if it exists.
	llvm::Value *Have = Builder.CreateIsNotNull(Init);
	llvm::BasicBlock *InitBB = llvm::BasicBlock::Create(Context, "", Wrapper);
	llvm::BasicBlock *ExitBB = llvm::BasicBlock::Create(Context, "", Wrapper);
	Builder.CreateCondBr(Have, InitBB, ExitBB);

	Builder.SetInsertPoint(InitBB);
	Builder.CreateCall(Init);
	Builder.CreateBr(ExitBB);

	Builder.SetInsertPoint(ExitBB);
	}

	// For a reference, the result of the wrapper function is a pointer to
	// the referenced object.
	llvm::Value *Val = Var;
	if (VD->getType()->isReferenceType()) {
	CharUnits Align = CGM.getContext().getDeclAlign(VD);
	Val = Builder.CreateAlignedLoad(Val, Align);
	}
	if (Val->getType() != Wrapper->getReturnType())
	Val = Builder.CreatePointerBitCastOrAddrSpaceCast(
	Val, Wrapper->getReturnType(), "");
	Builder.CreateRet(Val);
	}
	}

	LValue ItaniumCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
	const VarDecl *VD,
	QualType LValType) {
	llvm::Value *Val = CGF.CGM.GetAddrOfGlobalVar(VD);
	llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Val);

	llvm::CallInst *CallVal = CGF.Builder.CreateCall(Wrapper);
	CallVal->setCallingConv(Wrapper->getCallingConv());

	LValue LV;
	if (VD->getType()->isReferenceType())
	LV = CGF.MakeNaturalAlignAddrLValue(CallVal, LValType);
	else
	LV = CGF.MakeAddrLValue(CallVal, LValType,
	CGF.getContext().getDeclAlign(VD));
	// FIXME: need setObjCGCLValueClass?
	return LV;
	}

	/// Return whether the given global decl needs a VTT parameter, which it does
	/// if it's a base constructor or destructor with virtual bases.
	bool ItaniumCXXABI::NeedsVTTParameter(GlobalDecl GD) {
	const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());

	// We don't have any virtual bases, just return early.
	if (!MD->getParent()->getNumVBases())
	return false;

	// Check if we have a base constructor.
	if (isa<CXXConstructorDecl>(MD) && GD.getCtorType() == Ctor_Base)
	return true;

	// Check if we have a base destructor.
	if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base)
	return true;

	return false;
	}

	namespace {
	class ItaniumRTTIBuilder {
	CodeGenModule &CGM; // Per-module state.
	llvm::LLVMContext &VMContext;
	const ItaniumCXXABI &CXXABI; // Per-module state.

	/// Fields - The fields of the RTTI descriptor currently being built.
	SmallVector<llvm::Constant *, 16> Fields;

	/// GetAddrOfTypeName - Returns the mangled type name of the given type.
	llvm::GlobalVariable *
	GetAddrOfTypeName(QualType Ty, llvm::GlobalVariable::LinkageTypes Linkage);

	/// GetAddrOfExternalRTTIDescriptor - Returns the constant for the RTTI
	/// descriptor of the given type.
	llvm::Constant *GetAddrOfExternalRTTIDescriptor(QualType Ty);

	/// BuildVTablePointer - Build the vtable pointer for the given type.
	void BuildVTablePointer(const Type *Ty);

	/// BuildSIClassTypeInfo - Build an abi::__si_class_type_info, used for single
	/// inheritance, according to the Itanium C++ ABI, 2.9.5p6b.
	void BuildSIClassTypeInfo(const CXXRecordDecl *RD);

	/// BuildVMIClassTypeInfo - Build an abi::__vmi_class_type_info, used for
	/// classes with bases that do not satisfy the abi::__si_class_type_info
	/// constraints, according ti the Itanium C++ ABI, 2.9.5p5c.
	void BuildVMIClassTypeInfo(const CXXRecordDecl *RD);

	/// BuildPointerTypeInfo - Build an abi::__pointer_type_info struct, used
	/// for pointer types.
	void BuildPointerTypeInfo(QualType PointeeTy);

	/// BuildObjCObjectTypeInfo - Build the appropriate kind of
	/// type_info for an object type.
	void BuildObjCObjectTypeInfo(const ObjCObjectType *Ty);

	/// BuildPointerToMemberTypeInfo - Build an abi::__pointer_to_member_type_info
	/// struct, used for member pointer types.
	void BuildPointerToMemberTypeInfo(const MemberPointerType *Ty);

	public:
	ItaniumRTTIBuilder(const ItaniumCXXABI &ABI)
	: CGM(ABI.CGM), VMContext(CGM.getModule().getContext()), CXXABI(ABI) {}

	// Pointer type info flags.
	enum {
	/// PTI_Const - Type has const qualifier.
	PTI_Const = 0x1,

	/// PTI_Volatile - Type has volatile qualifier.
	PTI_Volatile = 0x2,

	/// PTI_Restrict - Type has restrict qualifier.
	PTI_Restrict = 0x4,

	/// PTI_Incomplete - Type is incomplete.
	PTI_Incomplete = 0x8,

	/// PTI_ContainingClassIncomplete - Containing class is incomplete.
	/// (in pointer to member).
	PTI_ContainingClassIncomplete = 0x10,

	/// PTI_TransactionSafe - Pointee is transaction_safe function (C++ TM TS).
	//PTI_TransactionSafe = 0x20,

	/// PTI_Noexcept - Pointee is noexcept function (C++1z).
	PTI_Noexcept = 0x40,
	};

	// VMI type info flags.
	enum {
	/// VMI_NonDiamondRepeat - Class has non-diamond repeated inheritance.
	VMI_NonDiamondRepeat = 0x1,

	/// VMI_DiamondShaped - Class is diamond shaped.
	VMI_DiamondShaped = 0x2
	};

	// Base class type info flags.
	enum {
	/// BCTI_Virtual - Base class is virtual.
	BCTI_Virtual = 0x1,

	/// BCTI_Public - Base class is public.
	BCTI_Public = 0x2
	};

	/// BuildTypeInfo - Build the RTTI type info struct for the given type.
	///
	/// \param Force - true to force the creation of this RTTI value
	/// \param DLLExport - true to mark the RTTI value as DLLExport
	llvm::Constant *BuildTypeInfo(QualType Ty, bool Force = false,
	bool DLLExport = false);
	};
	}

	llvm::GlobalVariable *ItaniumRTTIBuilder::GetAddrOfTypeName(
	QualType Ty, llvm::GlobalVariable::LinkageTypes Linkage) {
	SmallString<256> Name;
	llvm::raw_svector_ostream Out(Name);
	CGM.getCXXABI().getMangleContext().mangleCXXRTTIName(Ty, Out);

	// We know that the mangled name of the type starts at index 4 of the
	// mangled name of the typename, so we can just index into it in order to
	// get the mangled name of the type.
	llvm::Constant *Init = llvm::ConstantDataArray::getString(VMContext,
	Name.substr(4));

	llvm::GlobalVariable *GV =
	CGM.CreateOrReplaceCXXRuntimeVariable(Name, Init->getType(), Linkage);

	GV->setInitializer(Init);

	return GV;
	}

	llvm::Constant *
	ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) {
	// Mangle the RTTI name.
	SmallString<256> Name;
	llvm::raw_svector_ostream Out(Name);
	CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);

	// Look for an existing global.
	llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name);

	if (!GV) {
	// Create a new global variable.
	// Note for the future: If we would ever like to do deferred emission of
	// RTTI, check if emitting vtables opportunistically need any adjustment.

	GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
	/Constant=/true,
	llvm::GlobalValue::ExternalLinkage, nullptr,
	Name);
	if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
	const CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl());
	if (RD->hasAttr<DLLImportAttr>())
	GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
	}
	}

	return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
	}

	/// TypeInfoIsInStandardLibrary - Given a builtin type, returns whether the type
	/// info for that type is defined in the standard library.
	static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
	// Itanium C++ ABI 2.9.2:
	// Basic type information (e.g. for "int", "bool", etc.) will be kept in
	// the run-time support library. Specifically, the run-time support
	// library should contain type_info objects for the types X, X* and
	// X const*, for every X in: void, std::nullptr_t, bool, wchar_t, char,
	// unsigned char, signed char, short, unsigned short, int, unsigned int,
	// long, unsigned long, long long, unsigned long long, float, double,
	// long double, char16_t, char32_t, and the IEEE 754r decimal and
	// half-precision floating point types.
	//
	// GCC also emits RTTI for __int128.
	// FIXME: We do not emit RTTI information for decimal types here.

	// Types added here must also be added to EmitFundamentalRTTIDescriptors.
	switch (Ty->getKind()) {
	case BuiltinType::Void:
	case BuiltinType::NullPtr:
	case BuiltinType::Bool:
	case BuiltinType::WChar_S:
	case BuiltinType::WChar_U:
	case BuiltinType::Char_U:
	case BuiltinType::Char_S:
	case BuiltinType::UChar:
	case BuiltinType::SChar:
	case BuiltinType::Short:
	case BuiltinType::UShort:
	case BuiltinType::Int:
	case BuiltinType::UInt:
	case BuiltinType::Long:
	case BuiltinType::ULong:
	case BuiltinType::LongLong:
	case BuiltinType::ULongLong:
	case BuiltinType::Half:
	case BuiltinType::Float:
	case BuiltinType::Double:
	case BuiltinType::LongDouble:
	case BuiltinType::Float128:
	case BuiltinType::Char16:
	case BuiltinType::Char32:
	case BuiltinType::Int128:
	case BuiltinType::UInt128:
	return true;

	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLImageTypes.def"
	case BuiltinType::OCLSampler:
	case BuiltinType::OCLEvent:
	case BuiltinType::OCLClkEvent:
	case BuiltinType::OCLQueue:
	case BuiltinType::OCLReserveID:
	return false;

	case BuiltinType::Dependent:
	#define BUILTIN_TYPE(Id, SingletonId)
	#define PLACEHOLDER_TYPE(Id, SingletonId) \
	case BuiltinType::Id:
	#include "clang/AST/BuiltinTypes.def"
	llvm_unreachable("asking for RRTI for a placeholder type!");

	case BuiltinType::ObjCId:
	case BuiltinType::ObjCClass:
	case BuiltinType::ObjCSel:
	llvm_unreachable("FIXME: Objective-C types are unsupported!");
	}

	llvm_unreachable("Invalid BuiltinType Kind!");
	}

	static bool TypeInfoIsInStandardLibrary(const PointerType *PointerTy) {
	QualType PointeeTy = PointerTy->getPointeeType();
	const BuiltinType *BuiltinTy = dyn_cast<BuiltinType>(PointeeTy);
	if (!BuiltinTy)
	return false;

	// Check the qualifiers.
	Qualifiers Quals = PointeeTy.getQualifiers();
	Quals.removeConst();

	if (!Quals.empty())
	return false;

	return TypeInfoIsInStandardLibrary(BuiltinTy);
	}

	/// IsStandardLibraryRTTIDescriptor - Returns whether the type
	/// information for the given type exists in the standard library.
	static bool IsStandardLibraryRTTIDescriptor(QualType Ty) {
	// Type info for builtin types is defined in the standard library.
	if (const BuiltinType *BuiltinTy = dyn_cast<BuiltinType>(Ty))
	return TypeInfoIsInStandardLibrary(BuiltinTy);

	// Type info for some pointer types to builtin types is defined in the
	// standard library.
	if (const PointerType *PointerTy = dyn_cast<PointerType>(Ty))
	return TypeInfoIsInStandardLibrary(PointerTy);

	return false;
	}

	/// ShouldUseExternalRTTIDescriptor - Returns whether the type information for
	/// the given type exists somewhere else, and that we should not emit the type
	/// information in this translation unit. Assumes that it is not a
	/// standard-library type.
	static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM,
	QualType Ty) {
	ASTContext &Context = CGM.getContext();

	// If RTTI is disabled, assume it might be disabled in the
	// translation unit that defines any potential key function, too.
	if (!Context.getLangOpts().RTTI) return false;

	if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
	const CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl());
	if (!RD->hasDefinition())
	return false;

	if (!RD->isDynamicClass())
	return false;

	// FIXME: this may need to be reconsidered if the key function
	// changes.
	// N.B. We must always emit the RTTI data ourselves if there exists a key
	// function.
	bool IsDLLImport = RD->hasAttr<DLLImportAttr>();
	if (CGM.getVTables().isVTableExternal(RD))
	return IsDLLImport && !CGM.getTriple().isWindowsItaniumEnvironment()
	? false
	: true;

	if (IsDLLImport)
	return true;
	}

	return false;
	}

	/// IsIncompleteClassType - Returns whether the given record type is incomplete.
	static bool IsIncompleteClassType(const RecordType *RecordTy) {
	return !RecordTy->getDecl()->isCompleteDefinition();
	}

	/// ContainsIncompleteClassType - Returns whether the given type contains an
	/// incomplete class type. This is true if
	///
	/// * The given type is an incomplete class type.
	/// * The given type is a pointer type whose pointee type contains an
	/// incomplete class type.
	/// * The given type is a member pointer type whose class is an incomplete
	/// class type.
	/// * The given type is a member pointer type whoise pointee type contains an
	/// incomplete class type.
	/// is an indirect or direct pointer to an incomplete class type.
	static bool ContainsIncompleteClassType(QualType Ty) {
	if (const RecordType *RecordTy = dyn_cast<RecordType>(Ty)) {
	if (IsIncompleteClassType(RecordTy))
	return true;
	}

	if (const PointerType *PointerTy = dyn_cast<PointerType>(Ty))
	return ContainsIncompleteClassType(PointerTy->getPointeeType());

	if (const MemberPointerType *MemberPointerTy =
	dyn_cast<MemberPointerType>(Ty)) {
	// Check if the class type is incomplete.
	const RecordType *ClassType = cast<RecordType>(MemberPointerTy->getClass());
	if (IsIncompleteClassType(ClassType))
	return true;

	return ContainsIncompleteClassType(MemberPointerTy->getPointeeType());
	}

	return false;
	}

	// CanUseSingleInheritance - Return whether the given record decl has a "single,
	// public, non-virtual base at offset zero (i.e. the derived class is dynamic
	// iff the base is)", according to Itanium C++ ABI, 2.95p6b.
	static bool CanUseSingleInheritance(const CXXRecordDecl *RD) {
	// Check the number of bases.
	if (RD->getNumBases() != 1)
	return false;

	// Get the base.
	CXXRecordDecl::base_class_const_iterator Base = RD->bases_begin();

	// Check that the base is not virtual.
	if (Base->isVirtual())
	return false;

	// Check that the base is public.
	if (Base->getAccessSpecifier() != AS_public)
	return false;

	// Check that the class is dynamic iff the base is.
	const CXXRecordDecl *BaseDecl =
	cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
	if (!BaseDecl->isEmpty() &&
	BaseDecl->isDynamicClass() != RD->isDynamicClass())
	return false;

	return true;
	}

	void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
	// abi::__class_type_info.
	static const char * const ClassTypeInfo =
	"_ZTVN10__cxxabiv117__class_type_infoE";
	// abi::__si_class_type_info.
	static const char * const SIClassTypeInfo =
	"_ZTVN10__cxxabiv120__si_class_type_infoE";
	// abi::__vmi_class_type_info.
	static const char * const VMIClassTypeInfo =
	"_ZTVN10__cxxabiv121__vmi_class_type_infoE";

	const char *VTableName = nullptr;

	switch (Ty->getTypeClass()) {
	#define TYPE(Class, Base)
	#define ABSTRACT_TYPE(Class, Base)
	#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
	#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
	#define DEPENDENT_TYPE(Class, Base) case Type::Class:
	#include "clang/AST/TypeNodes.def"
	llvm_unreachable("Non-canonical and dependent types shouldn't get here");

	case Type::LValueReference:
	case Type::RValueReference:
	llvm_unreachable("References shouldn't get here");

	case Type::Auto:
	case Type::DeducedTemplateSpecialization:
	llvm_unreachable("Undeduced type shouldn't get here");

	case Type::Pipe:
	llvm_unreachable("Pipe types shouldn't get here");

	case Type::Builtin:
	// GCC treats vector and complex types as fundamental types.
	case Type::Vector:
	case Type::ExtVector:
	case Type::Complex:
	case Type::Atomic:
	// FIXME: GCC treats block pointers as fundamental types?!
	case Type::BlockPointer:
	// abi::__fundamental_type_info.
	VTableName = "_ZTVN10__cxxabiv123__fundamental_type_infoE";
	break;

	case Type::ConstantArray:
	case Type::IncompleteArray:
	case Type::VariableArray:
	// abi::__array_type_info.
	VTableName = "_ZTVN10__cxxabiv117__array_type_infoE";
	break;

	case Type::FunctionNoProto:
	case Type::FunctionProto:
	// abi::__function_type_info.
	VTableName = "_ZTVN10__cxxabiv120__function_type_infoE";
	break;

	case Type::Enum:
	// abi::__enum_type_info.
	VTableName = "_ZTVN10__cxxabiv116__enum_type_infoE";
	break;

	case Type::Record: {
	const CXXRecordDecl *RD =
	cast<CXXRecordDecl>(cast<RecordType>(Ty)->getDecl());

	if (!RD->hasDefinition() \|\| !RD->getNumBases()) {
	VTableName = ClassTypeInfo;
	} else if (CanUseSingleInheritance(RD)) {
	VTableName = SIClassTypeInfo;
	} else {
	VTableName = VMIClassTypeInfo;
	}

	break;
	}

	case Type::ObjCObject:
	// Ignore protocol qualifiers.
	Ty = cast<ObjCObjectType>(Ty)->getBaseType().getTypePtr();

	// Handle id and Class.
	if (isa<BuiltinType>(Ty)) {
	VTableName = ClassTypeInfo;
	break;
	}

	assert(isa<ObjCInterfaceType>(Ty));
	// Fall through.

	case Type::ObjCInterface:
	if (cast<ObjCInterfaceType>(Ty)->getDecl()->getSuperClass()) {
	VTableName = SIClassTypeInfo;
	} else {
	VTableName = ClassTypeInfo;
	}
	break;

	case Type::ObjCObjectPointer:
	case Type::Pointer:
	// abi::__pointer_type_info.
	VTableName = "_ZTVN10__cxxabiv119__pointer_type_infoE";
	break;

	case Type::MemberPointer:
	// abi::__pointer_to_member_type_info.
	VTableName = "_ZTVN10__cxxabiv129__pointer_to_member_type_infoE";
	break;
	}

	llvm::Constant *VTable =
	CGM.getModule().getOrInsertGlobal(VTableName, CGM.Int8PtrTy);

	llvm::Type *PtrDiffTy =
	CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType());

	// The vtable address point is 2.
	llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2);
	VTable =
	llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8PtrTy, VTable, Two);
	VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy);

	Fields.push_back(VTable);
	}

	/// \brief Return the linkage that the type info and type info name constants
	/// should have for the given type.
	static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
	QualType Ty) {
	// Itanium C++ ABI 2.9.5p7:
	// In addition, it and all of the intermediate abi::__pointer_type_info
	// structs in the chain down to the abi::__class_type_info for the
	// incomplete class type must be prevented from resolving to the
	// corresponding type_info structs for the complete class type, possibly
	// by making them local static objects. Finally, a dummy class RTTI is
	// generated for the incomplete type that will not resolve to the final
	// complete class RTTI (because the latter need not exist), possibly by
	// making it a local static object.
	if (ContainsIncompleteClassType(Ty))
	return llvm::GlobalValue::InternalLinkage;

	switch (Ty->getLinkage()) {
	case NoLinkage:
	case InternalLinkage:
	case UniqueExternalLinkage:
	return llvm::GlobalValue::InternalLinkage;

	case VisibleNoLinkage:
	case ModuleInternalLinkage:
	case ModuleLinkage:
	case ExternalLinkage:
	// RTTI is not enabled, which means that this type info struct is going
	// to be used for exception handling. Give it linkonce_odr linkage.
	if (!CGM.getLangOpts().RTTI)
	return llvm::GlobalValue::LinkOnceODRLinkage;

	if (const RecordType *Record = dyn_cast<RecordType>(Ty)) {
	const CXXRecordDecl *RD = cast<CXXRecordDecl>(Record->getDecl());
	if (RD->hasAttr<WeakAttr>())
	return llvm::GlobalValue::WeakODRLinkage;
	if (CGM.getTriple().isWindowsItaniumEnvironment())
	if (RD->hasAttr<DLLImportAttr>() &&
	ShouldUseExternalRTTIDescriptor(CGM, Ty))
	return llvm::GlobalValue::ExternalLinkage;
	if (RD->isDynamicClass()) {
	llvm::GlobalValue::LinkageTypes LT = CGM.getVTableLinkage(RD);
	// MinGW won't export the RTTI information when there is a key function.
	// Make sure we emit our own copy instead of attempting to dllimport it.
	if (RD->hasAttr<DLLImportAttr>() &&
	llvm::GlobalValue::isAvailableExternallyLinkage(LT))
	LT = llvm::GlobalValue::LinkOnceODRLinkage;
	return LT;
	}
	}

	return llvm::GlobalValue::LinkOnceODRLinkage;
	}

	llvm_unreachable("Invalid linkage!");
	}

	llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force,
	bool DLLExport) {
	// We want to operate on the canonical type.
	Ty = Ty.getCanonicalType();

	// Check if we've already emitted an RTTI descriptor for this type.
	SmallString<256> Name;
	llvm::raw_svector_ostream Out(Name);
	CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);

	llvm::GlobalVariable *OldGV = CGM.getModule().getNamedGlobal(Name);
	if (OldGV && !OldGV->isDeclaration()) {
	assert(!OldGV->hasAvailableExternallyLinkage() &&
	"available_externally typeinfos not yet implemented");

	return llvm::ConstantExpr::getBitCast(OldGV, CGM.Int8PtrTy);
	}

	// Check if there is already an external RTTI descriptor for this type.
	bool IsStdLib = IsStandardLibraryRTTIDescriptor(Ty);
	if (!Force && (IsStdLib \|\| ShouldUseExternalRTTIDescriptor(CGM, Ty)))
	return GetAddrOfExternalRTTIDescriptor(Ty);

	// Emit the standard library with external linkage.
	llvm::GlobalVariable::LinkageTypes Linkage;
	if (IsStdLib)
	Linkage = llvm::GlobalValue::ExternalLinkage;
	else
	Linkage = getTypeInfoLinkage(CGM, Ty);

	// Add the vtable pointer.
	BuildVTablePointer(cast<Type>(Ty));

	// And the name.
	llvm::GlobalVariable *TypeName = GetAddrOfTypeName(Ty, Linkage);
	llvm::Constant *TypeNameField;

	// If we're supposed to demote the visibility, be sure to set a flag
	// to use a string comparison for type_info comparisons.
	ItaniumCXXABI::RTTIUniquenessKind RTTIUniqueness =
	CXXABI.classifyRTTIUniqueness(Ty, Linkage);
	if (RTTIUniqueness != ItaniumCXXABI::RUK_Unique) {
	// The flag is the sign bit, which on ARM64 is defined to be clear
	// for global pointers. This is very ARM64-specific.
	TypeNameField = llvm::ConstantExpr::getPtrToInt(TypeName, CGM.Int64Ty);
	llvm::Constant *flag =
	llvm::ConstantInt::get(CGM.Int64Ty, ((uint64_t)1) << 63);
	TypeNameField = llvm::ConstantExpr::getAdd(TypeNameField, flag);
	TypeNameField =
	llvm::ConstantExpr::getIntToPtr(TypeNameField, CGM.Int8PtrTy);
	} else {
	TypeNameField = llvm::ConstantExpr::getBitCast(TypeName, CGM.Int8PtrTy);
	}
	Fields.push_back(TypeNameField);

	switch (Ty->getTypeClass()) {
	#define TYPE(Class, Base)
	#define ABSTRACT_TYPE(Class, Base)
	#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
	#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
	#define DEPENDENT_TYPE(Class, Base) case Type::Class:
	#include "clang/AST/TypeNodes.def"
	llvm_unreachable("Non-canonical and dependent types shouldn't get here");

	// GCC treats vector types as fundamental types.
	case Type::Builtin:
	case Type::Vector:
	case Type::ExtVector:
	case Type::Complex:
	case Type::BlockPointer:
	// Itanium C++ ABI 2.9.5p4:
	// abi::__fundamental_type_info adds no data members to std::type_info.
	break;

	case Type::LValueReference:
	case Type::RValueReference:
	llvm_unreachable("References shouldn't get here");

	case Type::Auto:
	case Type::DeducedTemplateSpecialization:
	llvm_unreachable("Undeduced type shouldn't get here");

	case Type::Pipe:
	llvm_unreachable("Pipe type shouldn't get here");

	case Type::ConstantArray:
	case Type::IncompleteArray:
	case Type::VariableArray:
	// Itanium C++ ABI 2.9.5p5:
	// abi::__array_type_info adds no data members to std::type_info.
	break;

	case Type::FunctionNoProto:
	case Type::FunctionProto:
	// Itanium C++ ABI 2.9.5p5:
	// abi::__function_type_info adds no data members to std::type_info.
	break;

	case Type::Enum:
	// Itanium C++ ABI 2.9.5p5:
	// abi::__enum_type_info adds no data members to std::type_info.
	break;

	case Type::Record: {
	const CXXRecordDecl *RD =
	cast<CXXRecordDecl>(cast<RecordType>(Ty)->getDecl());
	if (!RD->hasDefinition() \|\| !RD->getNumBases()) {
	// We don't need to emit any fields.
	break;
	}

	if (CanUseSingleInheritance(RD))
	BuildSIClassTypeInfo(RD);
	else
	BuildVMIClassTypeInfo(RD);

	break;
	}

	case Type::ObjCObject:
	case Type::ObjCInterface:
	BuildObjCObjectTypeInfo(cast<ObjCObjectType>(Ty));
	break;

	case Type::ObjCObjectPointer:
	BuildPointerTypeInfo(cast<ObjCObjectPointerType>(Ty)->getPointeeType());
	break;

	case Type::Pointer:
	BuildPointerTypeInfo(cast<PointerType>(Ty)->getPointeeType());
	break;

	case Type::MemberPointer:
	BuildPointerToMemberTypeInfo(cast<MemberPointerType>(Ty));
	break;

	case Type::Atomic:
	// No fields, at least for the moment.
	break;
	}

	llvm::Constant *Init = llvm::ConstantStruct::getAnon(Fields);

	llvm::Module &M = CGM.getModule();
	llvm::GlobalVariable *GV =
	new llvm::GlobalVariable(M, Init->getType(),
	/Constant=/true, Linkage, Init, Name);

	// If there's already an old global variable, replace it with the new one.
	if (OldGV) {
	GV->takeName(OldGV);
	llvm::Constant *NewPtr =
	llvm::ConstantExpr::getBitCast(GV, OldGV->getType());
	OldGV->replaceAllUsesWith(NewPtr);
	OldGV->eraseFromParent();
	}

	if (CGM.supportsCOMDAT() && GV->isWeakForLinker())
	GV->setComdat(M.getOrInsertComdat(GV->getName()));

	// The Itanium ABI specifies that type_info objects must be globally
	// unique, with one exception: if the type is an incomplete class
	// type or a (possibly indirect) pointer to one. That exception
	// affects the general case of comparing type_info objects produced
	// by the typeid operator, which is why the comparison operators on
	// std::type_info generally use the type_info name pointers instead
	// of the object addresses. However, the language's built-in uses
	// of RTTI generally require class types to be complete, even when
	// manipulating pointers to those class types. This allows the
	// implementation of dynamic_cast to rely on address equality tests,
	// which is much faster.

	// All of this is to say that it's important that both the type_info
	// object and the type_info name be uniqued when weakly emitted.

	// Give the type_info object and name the formal visibility of the
	// type itself.
	llvm::GlobalValue::VisibilityTypes llvmVisibility;
	if (llvm::GlobalValue::isLocalLinkage(Linkage))
	// If the linkage is local, only default visibility makes sense.
	llvmVisibility = llvm::GlobalValue::DefaultVisibility;
	else if (RTTIUniqueness == ItaniumCXXABI::RUK_NonUniqueHidden)
	llvmVisibility = llvm::GlobalValue::HiddenVisibility;
	else
	llvmVisibility = CodeGenModule::GetLLVMVisibility(Ty->getVisibility());

	TypeName->setVisibility(llvmVisibility);
	GV->setVisibility(llvmVisibility);

	if (CGM.getTriple().isWindowsItaniumEnvironment()) {
	auto RD = Ty->getAsCXXRecordDecl();
	if (DLLExport \|\| (RD && RD->hasAttr<DLLExportAttr>())) {
	TypeName->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
	GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
	} else if (RD && RD->hasAttr<DLLImportAttr>() &&
	ShouldUseExternalRTTIDescriptor(CGM, Ty)) {
	TypeName->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
	GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);

	// Because the typename and the typeinfo are DLL import, convert them to
	// declarations rather than definitions. The initializers still need to
	// be constructed to calculate the type for the declarations.
	TypeName->setInitializer(nullptr);
	GV->setInitializer(nullptr);
	}
	}

	return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
	}

	/// BuildObjCObjectTypeInfo - Build the appropriate kind of type_info
	/// for the given Objective-C object type.
	void ItaniumRTTIBuilder::BuildObjCObjectTypeInfo(const ObjCObjectType *OT) {
	// Drop qualifiers.
	const Type *T = OT->getBaseType().getTypePtr();
	assert(isa<BuiltinType>(T) \|\| isa<ObjCInterfaceType>(T));

	// The builtin types are abi::__class_type_infos and don't require
	// extra fields.
	if (isa<BuiltinType>(T)) return;

	ObjCInterfaceDecl *Class = cast<ObjCInterfaceType>(T)->getDecl();
	ObjCInterfaceDecl *Super = Class->getSuperClass();

	// Root classes are also __class_type_info.
	if (!Super) return;

	QualType SuperTy = CGM.getContext().getObjCInterfaceType(Super);

	// Everything else is single inheritance.
	llvm::Constant *BaseTypeInfo =
	ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(SuperTy);
	Fields.push_back(BaseTypeInfo);
	}

	/// BuildSIClassTypeInfo - Build an abi::__si_class_type_info, used for single
	/// inheritance, according to the Itanium C++ ABI, 2.95p6b.
	void ItaniumRTTIBuilder::BuildSIClassTypeInfo(const CXXRecordDecl *RD) {
	// Itanium C++ ABI 2.9.5p6b:
	// It adds to abi::__class_type_info a single member pointing to the
	// type_info structure for the base type,
	llvm::Constant *BaseTypeInfo =
	ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(RD->bases_begin()->getType());
	Fields.push_back(BaseTypeInfo);
	}

	namespace {
	/// SeenBases - Contains virtual and non-virtual bases seen when traversing
	/// a class hierarchy.
	struct SeenBases {
	llvm::SmallPtrSet<const CXXRecordDecl *, 16> NonVirtualBases;
	llvm::SmallPtrSet<const CXXRecordDecl *, 16> VirtualBases;
	};
	}

	/// ComputeVMIClassTypeInfoFlags - Compute the value of the flags member in
	/// abi::__vmi_class_type_info.
	///
	static unsigned ComputeVMIClassTypeInfoFlags(const CXXBaseSpecifier *Base,
	SeenBases &Bases) {

	unsigned Flags = 0;

	const CXXRecordDecl *BaseDecl =
	cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());

	if (Base->isVirtual()) {
	// Mark the virtual base as seen.
	if (!Bases.VirtualBases.insert(BaseDecl).second) {
	// If this virtual base has been seen before, then the class is diamond
	// shaped.
	Flags \|= ItaniumRTTIBuilder::VMI_DiamondShaped;
	} else {
	if (Bases.NonVirtualBases.count(BaseDecl))
	Flags \|= ItaniumRTTIBuilder::VMI_NonDiamondRepeat;
	}
	} else {
	// Mark the non-virtual base as seen.
	if (!Bases.NonVirtualBases.insert(BaseDecl).second) {
	// If this non-virtual base has been seen before, then the class has non-
	// diamond shaped repeated inheritance.
	Flags \|= ItaniumRTTIBuilder::VMI_NonDiamondRepeat;
	} else {
	if (Bases.VirtualBases.count(BaseDecl))
	Flags \|= ItaniumRTTIBuilder::VMI_NonDiamondRepeat;
	}
	}

	// Walk all bases.
	for (const auto &I : BaseDecl->bases())
	Flags \|= ComputeVMIClassTypeInfoFlags(&I, Bases);

	return Flags;
	}

	static unsigned ComputeVMIClassTypeInfoFlags(const CXXRecordDecl *RD) {
	unsigned Flags = 0;
	SeenBases Bases;

	// Walk all bases.
	for (const auto &I : RD->bases())
	Flags \|= ComputeVMIClassTypeInfoFlags(&I, Bases);

	return Flags;
	}

	/// BuildVMIClassTypeInfo - Build an abi::__vmi_class_type_info, used for
	/// classes with bases that do not satisfy the abi::__si_class_type_info
	/// constraints, according ti the Itanium C++ ABI, 2.9.5p5c.
	void ItaniumRTTIBuilder::BuildVMIClassTypeInfo(const CXXRecordDecl *RD) {
	llvm::Type *UnsignedIntLTy =
	CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);

	// Itanium C++ ABI 2.9.5p6c:
	// __flags is a word with flags describing details about the class
	// structure, which may be referenced by using the __flags_masks
	// enumeration. These flags refer to both direct and indirect bases.
	unsigned Flags = ComputeVMIClassTypeInfoFlags(RD);
	Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, Flags));

	// Itanium C++ ABI 2.9.5p6c:
	// __base_count is a word with the number of direct proper base class
	// descriptions that follow.
	Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, RD->getNumBases()));

	if (!RD->getNumBases())
	return;

	// Now add the base class descriptions.

	// Itanium C++ ABI 2.9.5p6c:
	// __base_info[] is an array of base class descriptions -- one for every
	// direct proper base. Each description is of the type:
	//
	// struct abi::__base_class_type_info {
	// public:
	// const __class_type_info *__base_type;
	// long __offset_flags;
	//
	// enum __offset_flags_masks {
	// __virtual_mask = 0x1,
	// __public_mask = 0x2,
	// __offset_shift = 8
	// };
	// };

	// If we're in mingw and 'long' isn't wide enough for a pointer, use 'long
	// long' instead of 'long' for __offset_flags. libstdc++abi uses long long on
	// LLP64 platforms.
	// FIXME: Consider updating libc++abi to match, and extend this logic to all
	// LLP64 platforms.
	QualType OffsetFlagsTy = CGM.getContext().LongTy;
	const TargetInfo &TI = CGM.getContext().getTargetInfo();
	if (TI.getTriple().isOSCygMing() && TI.getPointerWidth(0) > TI.getLongWidth())
	OffsetFlagsTy = CGM.getContext().LongLongTy;
	llvm::Type *OffsetFlagsLTy =
	CGM.getTypes().ConvertType(OffsetFlagsTy);

	for (const auto &Base : RD->bases()) {
	// The __base_type member points to the RTTI for the base type.
	Fields.push_back(ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(Base.getType()));

	const CXXRecordDecl *BaseDecl =
	cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl());

	int64_t OffsetFlags = 0;

	// All but the lower 8 bits of __offset_flags are a signed offset.
	// For a non-virtual base, this is the offset in the object of the base
	// subobject. For a virtual base, this is the offset in the virtual table of
	// the virtual base offset for the virtual base referenced (negative).
	CharUnits Offset;
	if (Base.isVirtual())
	Offset =
	CGM.getItaniumVTableContext().getVirtualBaseOffsetOffset(RD, BaseDecl);
	else {
	const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
	Offset = Layout.getBaseClassOffset(BaseDecl);
	};

	OffsetFlags = uint64_t(Offset.getQuantity()) << 8;

	// The low-order byte of __offset_flags contains flags, as given by the
	// masks from the enumeration __offset_flags_masks.
	if (Base.isVirtual())
	OffsetFlags \|= BCTI_Virtual;
	if (Base.getAccessSpecifier() == AS_public)
	OffsetFlags \|= BCTI_Public;

	Fields.push_back(llvm::ConstantInt::get(OffsetFlagsLTy, OffsetFlags));
	}
	}

	/// Compute the flags for a __pbase_type_info, and remove the corresponding
	/// pieces from \p Type.
	static unsigned extractPBaseFlags(ASTContext &Ctx, QualType &Type) {
	unsigned Flags = 0;

	if (Type.isConstQualified())
	Flags \|= ItaniumRTTIBuilder::PTI_Const;
	if (Type.isVolatileQualified())
	Flags \|= ItaniumRTTIBuilder::PTI_Volatile;
	if (Type.isRestrictQualified())
	Flags \|= ItaniumRTTIBuilder::PTI_Restrict;
	Type = Type.getUnqualifiedType();

	// Itanium C++ ABI 2.9.5p7:
	// When the abi::__pbase_type_info is for a direct or indirect pointer to an
	// incomplete class type, the incomplete target type flag is set.
	if (ContainsIncompleteClassType(Type))
	Flags \|= ItaniumRTTIBuilder::PTI_Incomplete;

	if (auto *Proto = Type->getAs<FunctionProtoType>()) {
	if (Proto->isNothrow(Ctx)) {
	Flags \|= ItaniumRTTIBuilder::PTI_Noexcept;
	Type = Ctx.getFunctionType(
	Proto->getReturnType(), Proto->getParamTypes(),
	Proto->getExtProtoInfo().withExceptionSpec(EST_None));
	}
	}

	return Flags;
	}

	/// BuildPointerTypeInfo - Build an abi::__pointer_type_info struct,
	/// used for pointer types.
	void ItaniumRTTIBuilder::BuildPointerTypeInfo(QualType PointeeTy) {
	// Itanium C++ ABI 2.9.5p7:
	// __flags is a flag word describing the cv-qualification and other
	// attributes of the type pointed to
	unsigned Flags = extractPBaseFlags(CGM.getContext(), PointeeTy);

	llvm::Type *UnsignedIntLTy =
	CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);
	Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, Flags));

	// Itanium C++ ABI 2.9.5p7:
	// __pointee is a pointer to the std::type_info derivation for the
	// unqualified type being pointed to.
	llvm::Constant *PointeeTypeInfo =
	ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(PointeeTy);
	Fields.push_back(PointeeTypeInfo);
	}

	/// BuildPointerToMemberTypeInfo - Build an abi::__pointer_to_member_type_info
	/// struct, used for member pointer types.
	void
	ItaniumRTTIBuilder::BuildPointerToMemberTypeInfo(const MemberPointerType *Ty) {
	QualType PointeeTy = Ty->getPointeeType();

	// Itanium C++ ABI 2.9.5p7:
	// __flags is a flag word describing the cv-qualification and other
	// attributes of the type pointed to.
	unsigned Flags = extractPBaseFlags(CGM.getContext(), PointeeTy);

	const RecordType *ClassType = cast<RecordType>(Ty->getClass());
	if (IsIncompleteClassType(ClassType))
	Flags \|= PTI_ContainingClassIncomplete;

	llvm::Type *UnsignedIntLTy =
	CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);
	Fields.push_back(llvm::ConstantInt::get(UnsignedIntLTy, Flags));

	// Itanium C++ ABI 2.9.5p7:
	// __pointee is a pointer to the std::type_info derivation for the
	// unqualified type being pointed to.
	llvm::Constant *PointeeTypeInfo =
	ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(PointeeTy);
	Fields.push_back(PointeeTypeInfo);

	// Itanium C++ ABI 2.9.5p9:
	// __context is a pointer to an abi::__class_type_info corresponding to the
	// class type containing the member pointed to
	// (e.g., the "A" in "int A::*").
	Fields.push_back(
	ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(QualType(ClassType, 0)));
	}

	llvm::Constant *ItaniumCXXABI::getAddrOfRTTIDescriptor(QualType Ty) {
	return ItaniumRTTIBuilder(*this).BuildTypeInfo(Ty);
	}

	void ItaniumCXXABI::EmitFundamentalRTTIDescriptor(QualType Type,
	bool DLLExport) {
	QualType PointerType = getContext().getPointerType(Type);
	QualType PointerTypeConst = getContext().getPointerType(Type.withConst());
	ItaniumRTTIBuilder(this).BuildTypeInfo(Type, /Force=*/true, DLLExport);
	ItaniumRTTIBuilder(this).BuildTypeInfo(PointerType, /Force=*/true,
	DLLExport);
	ItaniumRTTIBuilder(this).BuildTypeInfo(PointerTypeConst, /Force=*/true,
	DLLExport);
	}

	void ItaniumCXXABI::EmitFundamentalRTTIDescriptors(bool DLLExport) {
	// Types added here must also be added to TypeInfoIsInStandardLibrary.
	QualType FundamentalTypes[] = {
	getContext().VoidTy, getContext().NullPtrTy,
	getContext().BoolTy, getContext().WCharTy,
	getContext().CharTy, getContext().UnsignedCharTy,
	getContext().SignedCharTy, getContext().ShortTy,
	getContext().UnsignedShortTy, getContext().IntTy,
	getContext().UnsignedIntTy, getContext().LongTy,
	getContext().UnsignedLongTy, getContext().LongLongTy,
	getContext().UnsignedLongLongTy, getContext().Int128Ty,
	getContext().UnsignedInt128Ty, getContext().HalfTy,
	getContext().FloatTy, getContext().DoubleTy,
	getContext().LongDoubleTy, getContext().Float128Ty,
	getContext().Char16Ty, getContext().Char32Ty
	};
	for (const QualType &FundamentalType : FundamentalTypes)
	EmitFundamentalRTTIDescriptor(FundamentalType, DLLExport);
	}

	/// What sort of uniqueness rules should we use for the RTTI for the
	/// given type?
	ItaniumCXXABI::RTTIUniquenessKind ItaniumCXXABI::classifyRTTIUniqueness(
	QualType CanTy, llvm::GlobalValue::LinkageTypes Linkage) const {
	if (shouldRTTIBeUnique())
	return RUK_Unique;

	// It's only necessary for linkonce_odr or weak_odr linkage.
	if (Linkage != llvm::GlobalValue::LinkOnceODRLinkage &&
	Linkage != llvm::GlobalValue::WeakODRLinkage)
	return RUK_Unique;

	// It's only necessary with default visibility.
	if (CanTy->getVisibility() != DefaultVisibility)
	return RUK_Unique;

	// If we're not required to publish this symbol, hide it.
	if (Linkage == llvm::GlobalValue::LinkOnceODRLinkage)
	return RUK_NonUniqueHidden;

	// If we're required to publish this symbol, as we might be under an
	// explicit instantiation, leave it with default visibility but
	// enable string-comparisons.
	assert(Linkage == llvm::GlobalValue::WeakODRLinkage);
	return RUK_NonUniqueVisible;
	}

	// Find out how to codegen the complete destructor and constructor
	namespace {
	enum class StructorCodegen { Emit, RAUW, Alias, COMDAT };
	}
	static StructorCodegen getCodegenToUse(CodeGenModule &CGM,
	const CXXMethodDecl *MD) {
	if (!CGM.getCodeGenOpts().CXXCtorDtorAliases)
	return StructorCodegen::Emit;

	// The complete and base structors are not equivalent if there are any virtual
	// bases, so emit separate functions.
	if (MD->getParent()->getNumVBases())
	return StructorCodegen::Emit;

	GlobalDecl AliasDecl;
	if (const auto *DD = dyn_cast<CXXDestructorDecl>(MD)) {
	AliasDecl = GlobalDecl(DD, Dtor_Complete);
	} else {
	const auto *CD = cast<CXXConstructorDecl>(MD);
	AliasDecl = GlobalDecl(CD, Ctor_Complete);
	}
	llvm::GlobalValue::LinkageTypes Linkage = CGM.getFunctionLinkage(AliasDecl);

	if (llvm::GlobalValue::isDiscardableIfUnused(Linkage))
	return StructorCodegen::RAUW;

	// FIXME: Should we allow available_externally aliases?
	if (!llvm::GlobalAlias::isValidLinkage(Linkage))
	return StructorCodegen::RAUW;

	if (llvm::GlobalValue::isWeakForLinker(Linkage)) {
	// Only ELF and wasm support COMDATs with arbitrary names (C5/D5).
	if (CGM.getTarget().getTriple().isOSBinFormatELF() \|\|
	CGM.getTarget().getTriple().isOSBinFormatWasm())
	return StructorCodegen::COMDAT;
	return StructorCodegen::Emit;
	}

	return StructorCodegen::Alias;
	}

	static void emitConstructorDestructorAlias(CodeGenModule &CGM,
	GlobalDecl AliasDecl,
	GlobalDecl TargetDecl) {
	llvm::GlobalValue::LinkageTypes Linkage = CGM.getFunctionLinkage(AliasDecl);

	StringRef MangledName = CGM.getMangledName(AliasDecl);
	llvm::GlobalValue *Entry = CGM.GetGlobalValue(MangledName);
	if (Entry && !Entry->isDeclaration())
	return;

	auto *Aliasee = cast<llvm::GlobalValue>(CGM.GetAddrOfGlobal(TargetDecl));

	// Create the alias with no name.
	auto *Alias = llvm::GlobalAlias::create(Linkage, "", Aliasee);

	// Switch any previous uses to the alias.
	if (Entry) {
	assert(Entry->getType() == Aliasee->getType() &&
	"declaration exists with different type");
	Alias->takeName(Entry);
	Entry->replaceAllUsesWith(Alias);
	Entry->eraseFromParent();
	} else {
	Alias->setName(MangledName);
	}

	// Finally, set up the alias with its proper name and attributes.
	CGM.setAliasAttributes(cast<NamedDecl>(AliasDecl.getDecl()), Alias);
	}

	void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD,
	StructorType Type) {
	auto *CD = dyn_cast<CXXConstructorDecl>(MD);
	const CXXDestructorDecl *DD = CD ? nullptr : cast<CXXDestructorDecl>(MD);

	StructorCodegen CGType = getCodegenToUse(CGM, MD);

	if (Type == StructorType::Complete) {
	GlobalDecl CompleteDecl;
	GlobalDecl BaseDecl;
	if (CD) {
	CompleteDecl = GlobalDecl(CD, Ctor_Complete);
	BaseDecl = GlobalDecl(CD, Ctor_Base);
	} else {
	CompleteDecl = GlobalDecl(DD, Dtor_Complete);
	BaseDecl = GlobalDecl(DD, Dtor_Base);
	}

	if (CGType == StructorCodegen::Alias \|\| CGType == StructorCodegen::COMDAT) {
	emitConstructorDestructorAlias(CGM, CompleteDecl, BaseDecl);
	return;
	}

	if (CGType == StructorCodegen::RAUW) {
	StringRef MangledName = CGM.getMangledName(CompleteDecl);
	auto *Aliasee = CGM.GetAddrOfGlobal(BaseDecl);
	CGM.addReplacement(MangledName, Aliasee);
	return;
	}
	}

	// The base destructor is equivalent to the base destructor of its
	// base class if there is exactly one non-virtual base class with a
	// non-trivial destructor, there are no fields with a non-trivial
	// destructor, and the body of the destructor is trivial.
	if (DD && Type == StructorType::Base && CGType != StructorCodegen::COMDAT &&
	!CGM.TryEmitBaseDestructorAsAlias(DD))
	return;

	llvm::Function *Fn = CGM.codegenCXXStructor(MD, Type);

	if (CGType == StructorCodegen::COMDAT) {
	SmallString<256> Buffer;
	llvm::raw_svector_ostream Out(Buffer);
	if (DD)
	getMangleContext().mangleCXXDtorComdat(DD, Out);
	else
	getMangleContext().mangleCXXCtorComdat(CD, Out);
	llvm::Comdat *C = CGM.getModule().getOrInsertComdat(Out.str());
	Fn->setComdat(C);
	} else {
	CGM.maybeSetTrivialComdat(MD, Fn);
	}
	}

	static llvm::Constant *getBeginCatchFn(CodeGenModule &CGM) {
	// void __cxa_begin_catch(void);
	llvm::FunctionType *FTy = llvm::FunctionType::get(
	CGM.Int8PtrTy, CGM.Int8PtrTy, /IsVarArgs=/false);

	return CGM.CreateRuntimeFunction(FTy, "__cxa_begin_catch");
	}

	static llvm::Constant *getEndCatchFn(CodeGenModule &CGM) {
	// void __cxa_end_catch();
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGM.VoidTy, /IsVarArgs=/false);

	return CGM.CreateRuntimeFunction(FTy, "__cxa_end_catch");
	}

	static llvm::Constant *getGetExceptionPtrFn(CodeGenModule &CGM) {
	// void __cxa_get_exception_ptr(void);
	llvm::FunctionType *FTy = llvm::FunctionType::get(
	CGM.Int8PtrTy, CGM.Int8PtrTy, /IsVarArgs=/false);

	return CGM.CreateRuntimeFunction(FTy, "__cxa_get_exception_ptr");
	}

	namespace {
	/// A cleanup to call __cxa_end_catch. In many cases, the caught
	/// exception type lets us state definitively that the thrown exception
	/// type does not have a destructor. In particular:
	/// - Catch-alls tell us nothing, so we have to conservatively
	/// assume that the thrown exception might have a destructor.
	/// - Catches by reference behave according to their base types.
	/// - Catches of non-record types will only trigger for exceptions
	/// of non-record types, which never have destructors.
	/// - Catches of record types can trigger for arbitrary subclasses
	/// of the caught type, so we have to assume the actual thrown
	/// exception type might have a throwing destructor, even if the
	/// caught type's destructor is trivial or nothrow.
	struct CallEndCatch final : EHScopeStack::Cleanup {
	CallEndCatch(bool MightThrow) : MightThrow(MightThrow) {}
	bool MightThrow;

	void Emit(CodeGenFunction &CGF, Flags flags) override {
	if (!MightThrow) {
	CGF.EmitNounwindRuntimeCall(getEndCatchFn(CGF.CGM));
	return;
	}

	CGF.EmitRuntimeCallOrInvoke(getEndCatchFn(CGF.CGM));
	}
	};
	}

	/// Emits a call to __cxa_begin_catch and enters a cleanup to call
	/// __cxa_end_catch.
	///
	/// \param EndMightThrow - true if __cxa_end_catch might throw
	static llvm::Value *CallBeginCatch(CodeGenFunction &CGF,
	llvm::Value *Exn,
	bool EndMightThrow) {
	llvm::CallInst *call =
	CGF.EmitNounwindRuntimeCall(getBeginCatchFn(CGF.CGM), Exn);

	CGF.EHStack.pushCleanup<CallEndCatch>(NormalAndEHCleanup, EndMightThrow);

	return call;
	}

	/// A "special initializer" callback for initializing a catch
	/// parameter during catch initialization.
	static void InitCatchParam(CodeGenFunction &CGF,
	const VarDecl &CatchParam,
	Address ParamAddr,
	SourceLocation Loc) {
	// Load the exception from where the landing pad saved it.
	llvm::Value *Exn = CGF.getExceptionFromSlot();

	CanQualType CatchType =
	CGF.CGM.getContext().getCanonicalType(CatchParam.getType());
	llvm::Type *LLVMCatchTy = CGF.ConvertTypeForMem(CatchType);

	// If we're catching by reference, we can just cast the object
	// pointer to the appropriate pointer.
	if (isa<ReferenceType>(CatchType)) {
	QualType CaughtType = cast<ReferenceType>(CatchType)->getPointeeType();
	bool EndCatchMightThrow = CaughtType->isRecordType();

	// __cxa_begin_catch returns the adjusted object pointer.
	llvm::Value *AdjustedExn = CallBeginCatch(CGF, Exn, EndCatchMightThrow);

	// We have no way to tell the personality function that we're
	// catching by reference, so if we're catching a pointer,
	// __cxa_begin_catch will actually return that pointer by value.
	if (const PointerType *PT = dyn_cast<PointerType>(CaughtType)) {
	QualType PointeeType = PT->getPointeeType();

	// When catching by reference, generally we should just ignore
	// this by-value pointer and use the exception object instead.
	if (!PointeeType->isRecordType()) {

	// Exn points to the struct _Unwind_Exception header, which
	// we have to skip past in order to reach the exception data.
	unsigned HeaderSize =
	CGF.CGM.getTargetCodeGenInfo().getSizeOfUnwindException();
	AdjustedExn = CGF.Builder.CreateConstGEP1_32(Exn, HeaderSize);

	// However, if we're catching a pointer-to-record type that won't
	// work, because the personality function might have adjusted
	// the pointer. There's actually no way for us to fully satisfy
	// the language/ABI contract here: we can't use Exn because it
	// might have the wrong adjustment, but we can't use the by-value
	// pointer because it's off by a level of abstraction.
	//
	// The current solution is to dump the adjusted pointer into an
	// alloca, which breaks language semantics (because changing the
	// pointer doesn't change the exception) but at least works.
	// The better solution would be to filter out non-exact matches
	// and rethrow them, but this is tricky because the rethrow
	// really needs to be catchable by other sites at this landing
	// pad. The best solution is to fix the personality function.
	} else {
	// Pull the pointer for the reference type off.
	llvm::Type *PtrTy =
	cast<llvm::PointerType>(LLVMCatchTy)->getElementType();

	// Create the temporary and write the adjusted pointer into it.
	Address ExnPtrTmp =
	CGF.CreateTempAlloca(PtrTy, CGF.getPointerAlign(), "exn.byref.tmp");
	llvm::Value *Casted = CGF.Builder.CreateBitCast(AdjustedExn, PtrTy);
	CGF.Builder.CreateStore(Casted, ExnPtrTmp);

	// Bind the reference to the temporary.
	AdjustedExn = ExnPtrTmp.getPointer();
	}
	}

	llvm::Value *ExnCast =
	CGF.Builder.CreateBitCast(AdjustedExn, LLVMCatchTy, "exn.byref");
	CGF.Builder.CreateStore(ExnCast, ParamAddr);
	return;
	}

	// Scalars and complexes.
	TypeEvaluationKind TEK = CGF.getEvaluationKind(CatchType);
	if (TEK != TEK_Aggregate) {
	llvm::Value *AdjustedExn = CallBeginCatch(CGF, Exn, false);

	// If the catch type is a pointer type, __cxa_begin_catch returns
	// the pointer by value.
	if (CatchType->hasPointerRepresentation()) {
	llvm::Value *CastExn =
	CGF.Builder.CreateBitCast(AdjustedExn, LLVMCatchTy, "exn.casted");

	switch (CatchType.getQualifiers().getObjCLifetime()) {
	case Qualifiers::OCL_Strong:
	CastExn = CGF.EmitARCRetainNonBlock(CastExn);
	// fallthrough

	case Qualifiers::OCL_None:
	case Qualifiers::OCL_ExplicitNone:
	case Qualifiers::OCL_Autoreleasing:
	CGF.Builder.CreateStore(CastExn, ParamAddr);
	return;

	case Qualifiers::OCL_Weak:
	CGF.EmitARCInitWeak(ParamAddr, CastExn);
	return;
	}
	llvm_unreachable("bad ownership qualifier!");
	}

	// Otherwise, it returns a pointer into the exception object.

	llvm::Type *PtrTy = LLVMCatchTy->getPointerTo(0); // addrspace 0 ok
	llvm::Value *Cast = CGF.Builder.CreateBitCast(AdjustedExn, PtrTy);

	LValue srcLV = CGF.MakeNaturalAlignAddrLValue(Cast, CatchType);
	LValue destLV = CGF.MakeAddrLValue(ParamAddr, CatchType);
	switch (TEK) {
	case TEK_Complex:
	CGF.EmitStoreOfComplex(CGF.EmitLoadOfComplex(srcLV, Loc), destLV,
	/init/ true);
	return;
	case TEK_Scalar: {
	llvm::Value *ExnLoad = CGF.EmitLoadOfScalar(srcLV, Loc);
	CGF.EmitStoreOfScalar(ExnLoad, destLV, /init/ true);
	return;
	}
	case TEK_Aggregate:
	llvm_unreachable("evaluation kind filtered out!");
	}
	llvm_unreachable("bad evaluation kind");
	}

	assert(isa<RecordType>(CatchType) && "unexpected catch type!");
	auto catchRD = CatchType->getAsCXXRecordDecl();
	CharUnits caughtExnAlignment = CGF.CGM.getClassPointerAlignment(catchRD);

	llvm::Type *PtrTy = LLVMCatchTy->getPointerTo(0); // addrspace 0 ok

	// Check for a copy expression. If we don't have a copy expression,
	// that means a trivial copy is okay.
	const Expr *copyExpr = CatchParam.getInit();
	if (!copyExpr) {
	llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true);
	Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
	caughtExnAlignment);
	CGF.EmitAggregateCopy(ParamAddr, adjustedExn, CatchType);
	return;
	}

	// We have to call __cxa_get_exception_ptr to get the adjusted
	// pointer before copying.
	llvm::CallInst *rawAdjustedExn =
	CGF.EmitNounwindRuntimeCall(getGetExceptionPtrFn(CGF.CGM), Exn);

	// Cast that to the appropriate type.
	Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
	caughtExnAlignment);

	// The copy expression is defined in terms of an OpaqueValueExpr.
	// Find it and map it to the adjusted expression.
	CodeGenFunction::OpaqueValueMapping
	opaque(CGF, OpaqueValueExpr::findInCopyConstruct(copyExpr),
	CGF.MakeAddrLValue(adjustedExn, CatchParam.getType()));

	// Call the copy ctor in a terminate scope.
	CGF.EHStack.pushTerminate();

	// Perform the copy construction.
	CGF.EmitAggExpr(copyExpr,
	AggValueSlot::forAddr(ParamAddr, Qualifiers(),
	AggValueSlot::IsNotDestructed,
	AggValueSlot::DoesNotNeedGCBarriers,
	AggValueSlot::IsNotAliased));

	// Leave the terminate scope.
	CGF.EHStack.popTerminate();

	// Undo the opaque value mapping.
	opaque.pop();

	// Finally we can call __cxa_begin_catch.
	CallBeginCatch(CGF, Exn, true);
	}

	/// Begins a catch statement by initializing the catch variable and
	/// calling __cxa_begin_catch.
	void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF,
	const CXXCatchStmt *S) {
	// We have to be very careful with the ordering of cleanups here:
	// C++ [except.throw]p4:
	// The destruction [of the exception temporary] occurs
	// immediately after the destruction of the object declared in
	// the exception-declaration in the handler.
	//
	// So the precise ordering is:
	// 1. Construct catch variable.
	// 2. __cxa_begin_catch
	// 3. Enter __cxa_end_catch cleanup
	// 4. Enter dtor cleanup
	//
	// We do this by using a slightly abnormal initialization process.
	// Delegation sequence:
	// - ExitCXXTryStmt opens a RunCleanupsScope
	// - EmitAutoVarAlloca creates the variable and debug info
	// - InitCatchParam initializes the variable from the exception
	// - CallBeginCatch calls __cxa_begin_catch
	// - CallBeginCatch enters the __cxa_end_catch cleanup
	// - EmitAutoVarCleanups enters the variable destructor cleanup
	// - EmitCXXTryStmt emits the code for the catch body
	// - EmitCXXTryStmt close the RunCleanupsScope

	VarDecl *CatchParam = S->getExceptionDecl();
	if (!CatchParam) {
	llvm::Value *Exn = CGF.getExceptionFromSlot();
	CallBeginCatch(CGF, Exn, true);
	return;
	}

	// Emit the local.
	CodeGenFunction::AutoVarEmission var = CGF.EmitAutoVarAlloca(*CatchParam);
	InitCatchParam(CGF, *CatchParam, var.getObjectAddress(CGF), S->getLocStart());
	CGF.EmitAutoVarCleanups(var);
	}

	/// Get or define the following function:
	/// void @__clang_call_terminate(i8* %exn) nounwind noreturn
	/// This code is used only in C++.
	static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) {
	llvm::FunctionType *fnTy =
	llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /IsVarArgs=/false);
	llvm::Constant *fnRef = CGM.CreateRuntimeFunction(
	fnTy, "__clang_call_terminate", llvm::AttributeList(), /Local=/true);

	llvm::Function *fn = dyn_cast<llvm::Function>(fnRef);
	if (fn && fn->empty()) {
	fn->setDoesNotThrow();
	fn->setDoesNotReturn();

	// What we really want is to massively penalize inlining without
	// forbidding it completely. The difference between that and
	// 'noinline' is negligible.
	fn->addFnAttr(llvm::Attribute::NoInline);

	// Allow this function to be shared across translation units, but
	// we don't want it to turn into an exported symbol.
	fn->setLinkage(llvm::Function::LinkOnceODRLinkage);
	fn->setVisibility(llvm::Function::HiddenVisibility);
	if (CGM.supportsCOMDAT())
	fn->setComdat(CGM.getModule().getOrInsertComdat(fn->getName()));

	// Set up the function.
	llvm::BasicBlock *entry =
	llvm::BasicBlock::Create(CGM.getLLVMContext(), "", fn);
	CGBuilderTy builder(CGM, entry);

	// Pull the exception pointer out of the parameter list.
	llvm::Value exn = &fn->arg_begin();

	// Call __cxa_begin_catch(exn).
	llvm::CallInst *catchCall = builder.CreateCall(getBeginCatchFn(CGM), exn);
	catchCall->setDoesNotThrow();
	catchCall->setCallingConv(CGM.getRuntimeCC());

	// Call std::terminate().
	llvm::CallInst *termCall = builder.CreateCall(CGM.getTerminateFn());
	termCall->setDoesNotThrow();
	termCall->setDoesNotReturn();
	termCall->setCallingConv(CGM.getRuntimeCC());

	// std::terminate cannot return.
	builder.CreateUnreachable();
	}

	return fnRef;
	}

	llvm::CallInst *
	ItaniumCXXABI::emitTerminateForUnexpectedException(CodeGenFunction &CGF,
	llvm::Value *Exn) {
	// In C++, we want to call __cxa_begin_catch() before terminating.
	if (Exn) {
	assert(CGF.CGM.getLangOpts().CPlusPlus);
	return CGF.EmitNounwindRuntimeCall(getClangCallTerminateFn(CGF.CGM), Exn);
	}
	return CGF.EmitNounwindRuntimeCall(CGF.CGM.getTerminateFn());
	}
	Index: head/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp (revision 322855)
	@@ -1,4245 +1,4243 @@
	//===--- MicrosoftCXXABI.cpp - Emit LLVM Code from ASTs for a Module ------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This provides C++ code generation targeting the Microsoft Visual C++ ABI.
	// The class in this file generates structures that follow the Microsoft
	// Visual C++ ABI, which is actually not very well documented at all outside
	// of Microsoft.
	//
	//===----------------------------------------------------------------------===//

	#include "CGCXXABI.h"
	#include "CGCleanup.h"
	#include "CGVTables.h"
	#include "CodeGenModule.h"
	#include "CodeGenTypes.h"
	#include "TargetInfo.h"
	#include "clang/CodeGen/ConstantInitBuilder.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/StmtCXX.h"
	#include "clang/AST/VTableBuilder.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringSet.h"
	#include "llvm/IR/CallSite.h"
	#include "llvm/IR/Intrinsics.h"

	using namespace clang;
	using namespace CodeGen;

	namespace {

	/// Holds all the vbtable globals for a given class.
	struct VBTableGlobals {
	const VPtrInfoVector *VBTables;
	SmallVector<llvm::GlobalVariable *, 2> Globals;
	};

	class MicrosoftCXXABI : public CGCXXABI {
	public:
	MicrosoftCXXABI(CodeGenModule &CGM)
	: CGCXXABI(CGM), BaseClassDescriptorType(nullptr),
	ClassHierarchyDescriptorType(nullptr),
	CompleteObjectLocatorType(nullptr), CatchableTypeType(nullptr),
	ThrowInfoType(nullptr) {}

	bool HasThisReturn(GlobalDecl GD) const override;
	bool hasMostDerivedReturn(GlobalDecl GD) const override;

	bool classifyReturnType(CGFunctionInfo &FI) const override;

	RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override;

	bool isSRetParameterAfterThis() const override { return true; }

	bool isThisCompleteObject(GlobalDecl GD) const override {
	// The Microsoft ABI doesn't use separate complete-object vs.
	// base-object variants of constructors, but it does of destructors.
	if (isa<CXXDestructorDecl>(GD.getDecl())) {
	switch (GD.getDtorType()) {
	case Dtor_Complete:
	case Dtor_Deleting:
	return true;

	case Dtor_Base:
	return false;

	case Dtor_Comdat: llvm_unreachable("emitting dtor comdat as function?");
	}
	llvm_unreachable("bad dtor kind");
	}

	// No other kinds.
	return false;
	}

	size_t getSrcArgforCopyCtor(const CXXConstructorDecl *CD,
	FunctionArgList &Args) const override {
	assert(Args.size() >= 2 &&
	"expected the arglist to have at least two args!");
	// The 'most_derived' parameter goes second if the ctor is variadic and
	// has v-bases.
	if (CD->getParent()->getNumVBases() > 0 &&
	CD->getType()->castAs<FunctionProtoType>()->isVariadic())
	return 2;
	return 1;
	}

	std::vector<CharUnits> getVBPtrOffsets(const CXXRecordDecl *RD) override {
	std::vector<CharUnits> VBPtrOffsets;
	const ASTContext &Context = getContext();
	const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);

	const VBTableGlobals &VBGlobals = enumerateVBTables(RD);
	for (const std::unique_ptr<VPtrInfo> &VBT : *VBGlobals.VBTables) {
	const ASTRecordLayout &SubobjectLayout =
	Context.getASTRecordLayout(VBT->IntroducingObject);
	CharUnits Offs = VBT->NonVirtualOffset;
	Offs += SubobjectLayout.getVBPtrOffset();
	if (VBT->getVBaseWithVPtr())
	Offs += Layout.getVBaseClassOffset(VBT->getVBaseWithVPtr());
	VBPtrOffsets.push_back(Offs);
	}
	llvm::array_pod_sort(VBPtrOffsets.begin(), VBPtrOffsets.end());
	return VBPtrOffsets;
	}

	StringRef GetPureVirtualCallName() override { return "_purecall"; }
	StringRef GetDeletedVirtualCallName() override { return "_purecall"; }

	void emitVirtualObjectDelete(CodeGenFunction &CGF, const CXXDeleteExpr *DE,
	Address Ptr, QualType ElementType,
	const CXXDestructorDecl *Dtor) override;

	void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override;
	void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) override;

	void emitBeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *C) override;

	llvm::GlobalVariable getMSCompleteObjectLocator(const CXXRecordDecl RD,
	const VPtrInfo &Info);

	llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
	CatchTypeInfo
	getAddrOfCXXCatchHandlerType(QualType Ty, QualType CatchHandlerType) override;

	/// MSVC needs an extra flag to indicate a catchall.
	CatchTypeInfo getCatchAllTypeInfo() override {
	return CatchTypeInfo{nullptr, 0x40};
	}

	bool shouldTypeidBeNullChecked(bool IsDeref, QualType SrcRecordTy) override;
	void EmitBadTypeidCall(CodeGenFunction &CGF) override;
	llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy,
	Address ThisPtr,
	llvm::Type *StdTypeInfoPtrTy) override;

	bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
	QualType SrcRecordTy) override;

	llvm::Value *EmitDynamicCastCall(CodeGenFunction &CGF, Address Value,
	QualType SrcRecordTy, QualType DestTy,
	QualType DestRecordTy,
	llvm::BasicBlock *CastEnd) override;

	llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value,
	QualType SrcRecordTy,
	QualType DestTy) override;

	bool EmitBadCastCall(CodeGenFunction &CGF) override;
	bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const override {
	return false;
	}

	llvm::Value *
	GetVirtualBaseClassOffset(CodeGenFunction &CGF, Address This,
	const CXXRecordDecl *ClassDecl,
	const CXXRecordDecl *BaseClassDecl) override;

	llvm::BasicBlock *
	EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
	const CXXRecordDecl *RD) override;

	llvm::BasicBlock *
	EmitDtorCompleteObjectHandler(CodeGenFunction &CGF);

	void initializeHiddenVirtualInheritanceMembers(CodeGenFunction &CGF,
	const CXXRecordDecl *RD) override;

	void EmitCXXConstructors(const CXXConstructorDecl *D) override;

	// Background on MSVC destructors
	// ==============================
	//
	// Both Itanium and MSVC ABIs have destructor variants. The variant names
	// roughly correspond in the following way:
	// Itanium Microsoft
	// Base -> no name, just ~Class
	// Complete -> vbase destructor
	// Deleting -> scalar deleting destructor
	// vector deleting destructor
	//
	// The base and complete destructors are the same as in Itanium, although the
	// complete destructor does not accept a VTT parameter when there are virtual
	// bases. A separate mechanism involving vtordisps is used to ensure that
	// virtual methods of destroyed subobjects are not called.
	//
	// The deleting destructors accept an i32 bitfield as a second parameter. Bit
	// 1 indicates if the memory should be deleted. Bit 2 indicates if the this
	// pointer points to an array. The scalar deleting destructor assumes that
	// bit 2 is zero, and therefore does not contain a loop.
	//
	// For virtual destructors, only one entry is reserved in the vftable, and it
	// always points to the vector deleting destructor. The vector deleting
	// destructor is the most general, so it can be used to destroy objects in
	// place, delete single heap objects, or delete arrays.
	//
	// A TU defining a non-inline destructor is only guaranteed to emit a base
	// destructor, and all of the other variants are emitted on an as-needed basis
	// in COMDATs. Because a non-base destructor can be emitted in a TU that
	// lacks a definition for the destructor, non-base destructors must always
	// delegate to or alias the base destructor.

	AddedStructorArgs
	buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
	SmallVectorImpl<CanQualType> &ArgTys) override;

	/// Non-base dtors should be emitted as delegating thunks in this ABI.
	bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor,
	CXXDtorType DT) const override {
	return DT != Dtor_Base;
	}

	void EmitCXXDestructors(const CXXDestructorDecl *D) override;

	const CXXRecordDecl *
	getThisArgumentTypeForMethod(const CXXMethodDecl *MD) override {
	MD = MD->getCanonicalDecl();
	if (MD->isVirtual() && !isa<CXXDestructorDecl>(MD)) {
	MicrosoftVTableContext::MethodVFTableLocation ML =
	CGM.getMicrosoftVTableContext().getMethodVFTableLocation(MD);
	// The vbases might be ordered differently in the final overrider object
	// and the complete object, so the "this" argument may sometimes point to
	// memory that has no particular type (e.g. past the complete object).
	// In this case, we just use a generic pointer type.
	// FIXME: might want to have a more precise type in the non-virtual
	// multiple inheritance case.
	if (ML.VBase \|\| !ML.VFPtrOffset.isZero())
	return nullptr;
	}
	return MD->getParent();
	}

	Address
	adjustThisArgumentForVirtualFunctionCall(CodeGenFunction &CGF, GlobalDecl GD,
	Address This,
	bool VirtualCall) override;

	void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
	FunctionArgList &Params) override;

	llvm::Value *adjustThisParameterInVirtualFunctionPrologue(
	CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) override;

	void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override;

	AddedStructorArgs
	addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D,
	CXXCtorType Type, bool ForVirtualBase,
	bool Delegating, CallArgList &Args) override;

	void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
	CXXDtorType Type, bool ForVirtualBase,
	bool Delegating, Address This) override;

	void emitVTableTypeMetadata(const VPtrInfo &Info, const CXXRecordDecl *RD,
	llvm::GlobalVariable *VTable);

	void emitVTableDefinitions(CodeGenVTables &CGVT,
	const CXXRecordDecl *RD) override;

	bool isVirtualOffsetNeededForVTableField(CodeGenFunction &CGF,
	CodeGenFunction::VPtr Vptr) override;

	/// Don't initialize vptrs if dynamic class
	/// is marked with with the 'novtable' attribute.
	bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) override {
	return !VTableClass->hasAttr<MSNoVTableAttr>();
	}

	llvm::Constant *
	getVTableAddressPoint(BaseSubobject Base,
	const CXXRecordDecl *VTableClass) override;

	llvm::Value *getVTableAddressPointInStructor(
	CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
	BaseSubobject Base, const CXXRecordDecl *NearestVBase) override;

	llvm::Constant *
	getVTableAddressPointForConstExpr(BaseSubobject Base,
	const CXXRecordDecl *VTableClass) override;

	llvm::GlobalVariable getAddrOfVTable(const CXXRecordDecl RD,
	CharUnits VPtrOffset) override;

	CGCallee getVirtualFunctionPointer(CodeGenFunction &CGF, GlobalDecl GD,
	Address This, llvm::Type *Ty,
	SourceLocation Loc) override;

	llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
	const CXXDestructorDecl *Dtor,
	CXXDtorType DtorType,
	Address This,
	const CXXMemberCallExpr *CE) override;

	void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD,
	CallArgList &CallArgs) override {
	assert(GD.getDtorType() == Dtor_Deleting &&
	"Only deleting destructor thunks are available in this ABI");
	CallArgs.add(RValue::get(getStructorImplicitParamValue(CGF)),
	getContext().IntTy);
	}

	void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;

	llvm::GlobalVariable *
	getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
	llvm::GlobalVariable::LinkageTypes Linkage);

	llvm::GlobalVariable *
	getAddrOfVirtualDisplacementMap(const CXXRecordDecl *SrcRD,
	const CXXRecordDecl *DstRD) {
	SmallString<256> OutName;
	llvm::raw_svector_ostream Out(OutName);
	getMangleContext().mangleCXXVirtualDisplacementMap(SrcRD, DstRD, Out);
	StringRef MangledName = OutName.str();

	if (auto *VDispMap = CGM.getModule().getNamedGlobal(MangledName))
	return VDispMap;

	MicrosoftVTableContext &VTContext = CGM.getMicrosoftVTableContext();
	unsigned NumEntries = 1 + SrcRD->getNumVBases();
	SmallVector<llvm::Constant *, 4> Map(NumEntries,
	llvm::UndefValue::get(CGM.IntTy));
	Map[0] = llvm::ConstantInt::get(CGM.IntTy, 0);
	bool AnyDifferent = false;
	for (const auto &I : SrcRD->vbases()) {
	const CXXRecordDecl *VBase = I.getType()->getAsCXXRecordDecl();
	if (!DstRD->isVirtuallyDerivedFrom(VBase))
	continue;

	unsigned SrcVBIndex = VTContext.getVBTableIndex(SrcRD, VBase);
	unsigned DstVBIndex = VTContext.getVBTableIndex(DstRD, VBase);
	Map[SrcVBIndex] = llvm::ConstantInt::get(CGM.IntTy, DstVBIndex * 4);
	AnyDifferent \|= SrcVBIndex != DstVBIndex;
	}
	// This map would be useless, don't use it.
	if (!AnyDifferent)
	return nullptr;

	llvm::ArrayType *VDispMapTy = llvm::ArrayType::get(CGM.IntTy, Map.size());
	llvm::Constant *Init = llvm::ConstantArray::get(VDispMapTy, Map);
	llvm::GlobalValue::LinkageTypes Linkage =
	SrcRD->isExternallyVisible() && DstRD->isExternallyVisible()
	? llvm::GlobalValue::LinkOnceODRLinkage
	: llvm::GlobalValue::InternalLinkage;
	auto *VDispMap = new llvm::GlobalVariable(
	CGM.getModule(), VDispMapTy, /Constant=/true, Linkage,
	/Initializer=/Init, MangledName);
	return VDispMap;
	}

	void emitVBTableDefinition(const VPtrInfo &VBT, const CXXRecordDecl *RD,
	llvm::GlobalVariable *GV) const;

	void setThunkLinkage(llvm::Function *Thunk, bool ForVTable,
	GlobalDecl GD, bool ReturnAdjustment) override {
	// Never dllimport/dllexport thunks.
	Thunk->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);

	GVALinkage Linkage =
	getContext().GetGVALinkageForFunction(cast<FunctionDecl>(GD.getDecl()));

	if (Linkage == GVA_Internal)
	Thunk->setLinkage(llvm::GlobalValue::InternalLinkage);
	else if (ReturnAdjustment)
	Thunk->setLinkage(llvm::GlobalValue::WeakODRLinkage);
	else
	Thunk->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
	}

	llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This,
	const ThisAdjustment &TA) override;

	llvm::Value *performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
	const ReturnAdjustment &RA) override;

	void EmitThreadLocalInitFuncs(
	CodeGenModule &CGM, ArrayRef<const VarDecl *> CXXThreadLocals,
	ArrayRef<llvm::Function *> CXXThreadLocalInits,
	ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;

	bool usesThreadWrapperFunction() const override { return false; }
	LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
	QualType LValType) override;

	void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
	llvm::GlobalVariable *DeclPtr,
	bool PerformInit) override;
	void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
	llvm::Constant Dtor, llvm::Constant Addr) override;

	// ==== Notes on array cookies =========
	//
	// MSVC seems to only use cookies when the class has a destructor; a
	// two-argument usual array deallocation function isn't sufficient.
	//
	// For example, this code prints "100" and "1":
	// struct A {
	// char x;
	// void *operator new[](size_t sz) {
	// printf("%u\n", sz);
	// return malloc(sz);
	// }
	// void operator delete[](void *p, size_t sz) {
	// printf("%u\n", sz);
	// free(p);
	// }
	// };
	// int main() {
	// A *p = new A[100];
	// delete[] p;
	// }
	// Whereas it prints "104" and "104" if you give A a destructor.

	bool requiresArrayCookie(const CXXDeleteExpr *expr,
	QualType elementType) override;
	bool requiresArrayCookie(const CXXNewExpr *expr) override;
	CharUnits getArrayCookieSizeImpl(QualType type) override;
	Address InitializeArrayCookie(CodeGenFunction &CGF,
	Address NewPtr,
	llvm::Value *NumElements,
	const CXXNewExpr *expr,
	QualType ElementType) override;
	llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF,
	Address allocPtr,
	CharUnits cookieSize) override;

	friend struct MSRTTIBuilder;

	bool isImageRelative() const {
	return CGM.getTarget().getPointerWidth(/AddressSpace=/0) == 64;
	}

	// 5 routines for constructing the llvm types for MS RTTI structs.
	llvm::StructType *getTypeDescriptorType(StringRef TypeInfoString) {
	llvm::SmallString<32> TDTypeName("rtti.TypeDescriptor");
	TDTypeName += llvm::utostr(TypeInfoString.size());
	llvm::StructType *&TypeDescriptorType =
	TypeDescriptorTypeMap[TypeInfoString.size()];
	if (TypeDescriptorType)
	return TypeDescriptorType;
	llvm::Type *FieldTypes[] = {
	CGM.Int8PtrPtrTy,
	CGM.Int8PtrTy,
	llvm::ArrayType::get(CGM.Int8Ty, TypeInfoString.size() + 1)};
	TypeDescriptorType =
	llvm::StructType::create(CGM.getLLVMContext(), FieldTypes, TDTypeName);
	return TypeDescriptorType;
	}

	llvm::Type getImageRelativeType(llvm::Type PtrType) {
	if (!isImageRelative())
	return PtrType;
	return CGM.IntTy;
	}

	llvm::StructType *getBaseClassDescriptorType() {
	if (BaseClassDescriptorType)
	return BaseClassDescriptorType;
	llvm::Type *FieldTypes[] = {
	getImageRelativeType(CGM.Int8PtrTy),
	CGM.IntTy,
	CGM.IntTy,
	CGM.IntTy,
	CGM.IntTy,
	CGM.IntTy,
	getImageRelativeType(getClassHierarchyDescriptorType()->getPointerTo()),
	};
	BaseClassDescriptorType = llvm::StructType::create(
	CGM.getLLVMContext(), FieldTypes, "rtti.BaseClassDescriptor");
	return BaseClassDescriptorType;
	}

	llvm::StructType *getClassHierarchyDescriptorType() {
	if (ClassHierarchyDescriptorType)
	return ClassHierarchyDescriptorType;
	// Forward-declare RTTIClassHierarchyDescriptor to break a cycle.
	ClassHierarchyDescriptorType = llvm::StructType::create(
	CGM.getLLVMContext(), "rtti.ClassHierarchyDescriptor");
	llvm::Type *FieldTypes[] = {
	CGM.IntTy,
	CGM.IntTy,
	CGM.IntTy,
	getImageRelativeType(
	getBaseClassDescriptorType()->getPointerTo()->getPointerTo()),
	};
	ClassHierarchyDescriptorType->setBody(FieldTypes);
	return ClassHierarchyDescriptorType;
	}

	llvm::StructType *getCompleteObjectLocatorType() {
	if (CompleteObjectLocatorType)
	return CompleteObjectLocatorType;
	CompleteObjectLocatorType = llvm::StructType::create(
	CGM.getLLVMContext(), "rtti.CompleteObjectLocator");
	llvm::Type *FieldTypes[] = {
	CGM.IntTy,
	CGM.IntTy,
	CGM.IntTy,
	getImageRelativeType(CGM.Int8PtrTy),
	getImageRelativeType(getClassHierarchyDescriptorType()->getPointerTo()),
	getImageRelativeType(CompleteObjectLocatorType),
	};
	llvm::ArrayRef<llvm::Type *> FieldTypesRef(FieldTypes);
	if (!isImageRelative())
	FieldTypesRef = FieldTypesRef.drop_back();
	CompleteObjectLocatorType->setBody(FieldTypesRef);
	return CompleteObjectLocatorType;
	}

	llvm::GlobalVariable *getImageBase() {
	StringRef Name = "__ImageBase";
	if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name))
	return GV;

	return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty,
	/isConstant=/true,
	llvm::GlobalValue::ExternalLinkage,
	/Initializer=/nullptr, Name);
	}

	llvm::Constant getImageRelativeConstant(llvm::Constant PtrVal) {
	if (!isImageRelative())
	return PtrVal;

	if (PtrVal->isNullValue())
	return llvm::Constant::getNullValue(CGM.IntTy);

	llvm::Constant *ImageBaseAsInt =
	llvm::ConstantExpr::getPtrToInt(getImageBase(), CGM.IntPtrTy);
	llvm::Constant *PtrValAsInt =
	llvm::ConstantExpr::getPtrToInt(PtrVal, CGM.IntPtrTy);
	llvm::Constant *Diff =
	llvm::ConstantExpr::getSub(PtrValAsInt, ImageBaseAsInt,
	/HasNUW=/true, /HasNSW=/true);
	return llvm::ConstantExpr::getTrunc(Diff, CGM.IntTy);
	}

	private:
	MicrosoftMangleContext &getMangleContext() {
	return cast<MicrosoftMangleContext>(CodeGen::CGCXXABI::getMangleContext());
	}

	llvm::Constant *getZeroInt() {
	return llvm::ConstantInt::get(CGM.IntTy, 0);
	}

	llvm::Constant *getAllOnesInt() {
	return llvm::Constant::getAllOnesValue(CGM.IntTy);
	}

	CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) override;

	void
	GetNullMemberPointerFields(const MemberPointerType *MPT,
	llvm::SmallVectorImpl<llvm::Constant *> &fields);

	/// \brief Shared code for virtual base adjustment. Returns the offset from
	/// the vbptr to the virtual base. Optionally returns the address of the
	/// vbptr itself.
	llvm::Value *GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
	Address Base,
	llvm::Value *VBPtrOffset,
	llvm::Value *VBTableOffset,
	llvm::Value **VBPtr = nullptr);

	llvm::Value *GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
	Address Base,
	int32_t VBPtrOffset,
	int32_t VBTableOffset,
	llvm::Value **VBPtr = nullptr) {
	assert(VBTableOffset % 4 == 0 && "should be byte offset into table of i32s");
	llvm::Value *VBPOffset = llvm::ConstantInt::get(CGM.IntTy, VBPtrOffset),
	*VBTOffset = llvm::ConstantInt::get(CGM.IntTy, VBTableOffset);
	return GetVBaseOffsetFromVBPtr(CGF, Base, VBPOffset, VBTOffset, VBPtr);
	}

	std::pair<Address, llvm::Value *>
	performBaseAdjustment(CodeGenFunction &CGF, Address Value,
	QualType SrcRecordTy);

	/// \brief Performs a full virtual base adjustment. Used to dereference
	/// pointers to members of virtual bases.
	llvm::Value AdjustVirtualBase(CodeGenFunction &CGF, const Expr E,
	const CXXRecordDecl *RD, Address Base,
	llvm::Value *VirtualBaseAdjustmentOffset,
	llvm::Value VBPtrOffset / optional */);

	/// \brief Emits a full member pointer with the fields common to data and
	/// function member pointers.
	llvm::Constant EmitFullMemberPointer(llvm::Constant FirstField,
	bool IsMemberFunction,
	const CXXRecordDecl *RD,
	CharUnits NonVirtualBaseAdjustment,
	unsigned VBTableIndex);

	bool MemberPointerConstantIsNull(const MemberPointerType *MPT,
	llvm::Constant *MP);

	/// \brief - Initialize all vbptrs of 'this' with RD as the complete type.
	void EmitVBPtrStores(CodeGenFunction &CGF, const CXXRecordDecl *RD);

	/// \brief Caching wrapper around VBTableBuilder::enumerateVBTables().
	const VBTableGlobals &enumerateVBTables(const CXXRecordDecl *RD);

	/// \brief Generate a thunk for calling a virtual member function MD.
	llvm::Function *EmitVirtualMemPtrThunk(
	const CXXMethodDecl *MD,
	const MicrosoftVTableContext::MethodVFTableLocation &ML);

	public:
	llvm::Type ConvertMemberPointerType(const MemberPointerType MPT) override;

	bool isZeroInitializable(const MemberPointerType *MPT) override;

	bool isMemberPointerConvertible(const MemberPointerType *MPT) const override {
	const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
	return RD->hasAttr<MSInheritanceAttr>();
	}

	llvm::Constant EmitNullMemberPointer(const MemberPointerType MPT) override;

	llvm::Constant EmitMemberDataPointer(const MemberPointerType MPT,
	CharUnits offset) override;
	llvm::Constant EmitMemberFunctionPointer(const CXXMethodDecl MD) override;
	llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT) override;

	llvm::Value *EmitMemberPointerComparison(CodeGenFunction &CGF,
	llvm::Value *L,
	llvm::Value *R,
	const MemberPointerType *MPT,
	bool Inequality) override;

	llvm::Value *EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
	llvm::Value *MemPtr,
	const MemberPointerType *MPT) override;

	llvm::Value *
	EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
	Address Base, llvm::Value *MemPtr,
	const MemberPointerType *MPT) override;

	llvm::Value *EmitNonNullMemberPointerConversion(
	const MemberPointerType SrcTy, const MemberPointerType DstTy,
	CastKind CK, CastExpr::path_const_iterator PathBegin,
	CastExpr::path_const_iterator PathEnd, llvm::Value *Src,
	CGBuilderTy &Builder);

	llvm::Value *EmitMemberPointerConversion(CodeGenFunction &CGF,
	const CastExpr *E,
	llvm::Value *Src) override;

	llvm::Constant EmitMemberPointerConversion(const CastExpr E,
	llvm::Constant *Src) override;

	llvm::Constant *EmitMemberPointerConversion(
	const MemberPointerType SrcTy, const MemberPointerType DstTy,
	CastKind CK, CastExpr::path_const_iterator PathBegin,
	CastExpr::path_const_iterator PathEnd, llvm::Constant *Src);

	CGCallee
	EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF, const Expr *E,
	Address This, llvm::Value *&ThisPtrForCall,
	llvm::Value *MemPtr,
	const MemberPointerType *MPT) override;

	void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;

	llvm::StructType *getCatchableTypeType() {
	if (CatchableTypeType)
	return CatchableTypeType;
	llvm::Type *FieldTypes[] = {
	CGM.IntTy, // Flags
	getImageRelativeType(CGM.Int8PtrTy), // TypeDescriptor
	CGM.IntTy, // NonVirtualAdjustment
	CGM.IntTy, // OffsetToVBPtr
	CGM.IntTy, // VBTableIndex
	CGM.IntTy, // Size
	getImageRelativeType(CGM.Int8PtrTy) // CopyCtor
	};
	CatchableTypeType = llvm::StructType::create(
	CGM.getLLVMContext(), FieldTypes, "eh.CatchableType");
	return CatchableTypeType;
	}

	llvm::StructType *getCatchableTypeArrayType(uint32_t NumEntries) {
	llvm::StructType *&CatchableTypeArrayType =
	CatchableTypeArrayTypeMap[NumEntries];
	if (CatchableTypeArrayType)
	return CatchableTypeArrayType;

	llvm::SmallString<23> CTATypeName("eh.CatchableTypeArray.");
	CTATypeName += llvm::utostr(NumEntries);
	llvm::Type *CTType =
	getImageRelativeType(getCatchableTypeType()->getPointerTo());
	llvm::Type *FieldTypes[] = {
	CGM.IntTy, // NumEntries
	llvm::ArrayType::get(CTType, NumEntries) // CatchableTypes
	};
	CatchableTypeArrayType =
	llvm::StructType::create(CGM.getLLVMContext(), FieldTypes, CTATypeName);
	return CatchableTypeArrayType;
	}

	llvm::StructType *getThrowInfoType() {
	if (ThrowInfoType)
	return ThrowInfoType;
	llvm::Type *FieldTypes[] = {
	CGM.IntTy, // Flags
	getImageRelativeType(CGM.Int8PtrTy), // CleanupFn
	getImageRelativeType(CGM.Int8PtrTy), // ForwardCompat
	getImageRelativeType(CGM.Int8PtrTy) // CatchableTypeArray
	};
	ThrowInfoType = llvm::StructType::create(CGM.getLLVMContext(), FieldTypes,
	"eh.ThrowInfo");
	return ThrowInfoType;
	}

	llvm::Constant *getThrowFn() {
	// _CxxThrowException is passed an exception object and a ThrowInfo object
	// which describes the exception.
	llvm::Type *Args[] = {CGM.Int8PtrTy, getThrowInfoType()->getPointerTo()};
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGM.VoidTy, Args, /IsVarArgs=/false);
	auto *Fn = cast<llvm::Function>(
	CGM.CreateRuntimeFunction(FTy, "_CxxThrowException"));
	// _CxxThrowException is stdcall on 32-bit x86 platforms.
	if (CGM.getTarget().getTriple().getArch() == llvm::Triple::x86)
	Fn->setCallingConv(llvm::CallingConv::X86_StdCall);
	return Fn;
	}

	llvm::Function getAddrOfCXXCtorClosure(const CXXConstructorDecl CD,
	CXXCtorType CT);

	llvm::Constant *getCatchableType(QualType T,
	uint32_t NVOffset = 0,
	int32_t VBPtrOffset = -1,
	uint32_t VBIndex = 0);

	llvm::GlobalVariable *getCatchableTypeArray(QualType T);

	llvm::GlobalVariable *getThrowInfo(QualType T) override;

	private:
	typedef std::pair<const CXXRecordDecl *, CharUnits> VFTableIdTy;
	typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalVariable *> VTablesMapTy;
	typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalValue *> VFTablesMapTy;
	/// \brief All the vftables that have been referenced.
	VFTablesMapTy VFTablesMap;
	VTablesMapTy VTablesMap;

	/// \brief This set holds the record decls we've deferred vtable emission for.
	llvm::SmallPtrSet<const CXXRecordDecl *, 4> DeferredVFTables;


	/// \brief All the vbtables which have been referenced.
	llvm::DenseMap<const CXXRecordDecl *, VBTableGlobals> VBTablesMap;

	/// Info on the global variable used to guard initialization of static locals.
	/// The BitIndex field is only used for externally invisible declarations.
	struct GuardInfo {
	GuardInfo() : Guard(nullptr), BitIndex(0) {}
	llvm::GlobalVariable *Guard;
	unsigned BitIndex;
	};

	/// Map from DeclContext to the current guard variable. We assume that the
	/// AST is visited in source code order.
	llvm::DenseMap<const DeclContext *, GuardInfo> GuardVariableMap;
	llvm::DenseMap<const DeclContext *, GuardInfo> ThreadLocalGuardVariableMap;
	llvm::DenseMap<const DeclContext *, unsigned> ThreadSafeGuardNumMap;

	llvm::DenseMap<size_t, llvm::StructType *> TypeDescriptorTypeMap;
	llvm::StructType *BaseClassDescriptorType;
	llvm::StructType *ClassHierarchyDescriptorType;
	llvm::StructType *CompleteObjectLocatorType;

	llvm::DenseMap<QualType, llvm::GlobalVariable *> CatchableTypeArrays;

	llvm::StructType *CatchableTypeType;
	llvm::DenseMap<uint32_t, llvm::StructType *> CatchableTypeArrayTypeMap;
	llvm::StructType *ThrowInfoType;
	};

	}

	CGCXXABI::RecordArgABI
	MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const {
	switch (CGM.getTarget().getTriple().getArch()) {
	default:
	// FIXME: Implement for other architectures.
	return RAA_Default;

	case llvm::Triple::thumb:
	// Use the simple Itanium rules for now.
	// FIXME: This is incompatible with MSVC for arguments with a dtor and no
	// copy ctor.
	return !canCopyArgument(RD) ? RAA_Indirect : RAA_Default;

	case llvm::Triple::x86:
	// All record arguments are passed in memory on x86. Decide whether to
	// construct the object directly in argument memory, or to construct the
	// argument elsewhere and copy the bytes during the call.

	// If C++ prohibits us from making a copy, construct the arguments directly
	// into argument memory.
	if (!canCopyArgument(RD))
	return RAA_DirectInMemory;

	// Otherwise, construct the argument into a temporary and copy the bytes
	// into the outgoing argument memory.
	return RAA_Default;

	case llvm::Triple::x86_64:
	- // Win64 passes objects with non-trivial copy ctors indirectly.
	- if (RD->hasNonTrivialCopyConstructor())
	- return RAA_Indirect;
	-
	- // If an object has a destructor, we'd really like to pass it indirectly
	+ // If a class has a destructor, we'd really like to pass it indirectly
	// because it allows us to elide copies. Unfortunately, MSVC makes that
	// impossible for small types, which it will pass in a single register or
	// stack slot. Most objects with dtors are large-ish, so handle that early.
	// We can't call out all large objects as being indirect because there are
	// multiple x64 calling conventions and the C++ ABI code shouldn't dictate
	// how we pass large POD types.
	+ //
	+ // Note: This permits small classes with nontrivial destructors to be
	+ // passed in registers, which is non-conforming.
	if (RD->hasNonTrivialDestructor() &&
	getContext().getTypeSize(RD->getTypeForDecl()) > 64)
	return RAA_Indirect;

	- // If this is true, the implicit copy constructor that Sema would have
	- // created would not be deleted. FIXME: We should provide a more direct way
	- // for CodeGen to ask whether the constructor was deleted.
	- if (!RD->hasUserDeclaredCopyConstructor() &&
	- !RD->hasUserDeclaredMoveConstructor() &&
	- !RD->needsOverloadResolutionForMoveConstructor() &&
	- !RD->hasUserDeclaredMoveAssignment() &&
	- !RD->needsOverloadResolutionForMoveAssignment())
	- return RAA_Default;
	-
	- // Otherwise, Sema should have created an implicit copy constructor if
	- // needed.
	- assert(!RD->needsImplicitCopyConstructor());
	-
	- // We have to make sure the trivial copy constructor isn't deleted.
	- for (const CXXConstructorDecl *CD : RD->ctors()) {
	- if (CD->isCopyConstructor()) {
	- assert(CD->isTrivial());
	- // We had at least one undeleted trivial copy ctor. Return directly.
	- if (!CD->isDeleted())
	- return RAA_Default;
	+ // If a class has at least one non-deleted, trivial copy constructor, it
	+ // is passed according to the C ABI. Otherwise, it is passed indirectly.
	+ //
	+ // Note: This permits classes with non-trivial copy or move ctors to be
	+ // passed in registers, so long as they also have a trivial copy ctor,
	+ // which is non-conforming.
	+ if (RD->needsImplicitCopyConstructor()) {
	+ // If the copy ctor has not yet been declared, we can read its triviality
	+ // off the AST.
	+ if (!RD->defaultedCopyConstructorIsDeleted() &&
	+ RD->hasTrivialCopyConstructor())
	+ return RAA_Default;
	+ } else {
	+ // Otherwise, we need to find the copy constructor(s) and ask.
	+ for (const CXXConstructorDecl *CD : RD->ctors()) {
	+ if (CD->isCopyConstructor()) {
	+ // We had at least one nondeleted trivial copy ctor. Return directly.
	+ if (!CD->isDeleted() && CD->isTrivial())
	+ return RAA_Default;
	+ }
	}
	}

	- // The trivial copy constructor was deleted. Return indirectly.
	+ // We have no trivial, non-deleted copy constructor.
	return RAA_Indirect;
	}

	llvm_unreachable("invalid enum");
	}

	void MicrosoftCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
	const CXXDeleteExpr *DE,
	Address Ptr,
	QualType ElementType,
	const CXXDestructorDecl *Dtor) {
	// FIXME: Provide a source location here even though there's no
	// CXXMemberCallExpr for dtor call.
	bool UseGlobalDelete = DE->isGlobalDelete();
	CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting;
	llvm::Value *MDThis =
	EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, /CE=/nullptr);
	if (UseGlobalDelete)
	CGF.EmitDeleteCall(DE->getOperatorDelete(), MDThis, ElementType);
	}

	void MicrosoftCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) {
	llvm::Value *Args[] = {
	llvm::ConstantPointerNull::get(CGM.Int8PtrTy),
	llvm::ConstantPointerNull::get(getThrowInfoType()->getPointerTo())};
	auto *Fn = getThrowFn();
	if (isNoReturn)
	CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, Args);
	else
	CGF.EmitRuntimeCallOrInvoke(Fn, Args);
	}

	namespace {
	struct CatchRetScope final : EHScopeStack::Cleanup {
	llvm::CatchPadInst *CPI;

	CatchRetScope(llvm::CatchPadInst *CPI) : CPI(CPI) {}

	void Emit(CodeGenFunction &CGF, Flags flags) override {
	llvm::BasicBlock *BB = CGF.createBasicBlock("catchret.dest");
	CGF.Builder.CreateCatchRet(CPI, BB);
	CGF.EmitBlock(BB);
	}
	};
	}

	void MicrosoftCXXABI::emitBeginCatch(CodeGenFunction &CGF,
	const CXXCatchStmt *S) {
	// In the MS ABI, the runtime handles the copy, and the catch handler is
	// responsible for destruction.
	VarDecl *CatchParam = S->getExceptionDecl();
	llvm::BasicBlock *CatchPadBB = CGF.Builder.GetInsertBlock();
	llvm::CatchPadInst *CPI =
	cast<llvm::CatchPadInst>(CatchPadBB->getFirstNonPHI());
	CGF.CurrentFuncletPad = CPI;

	// If this is a catch-all or the catch parameter is unnamed, we don't need to
	// emit an alloca to the object.
	if (!CatchParam \|\| !CatchParam->getDeclName()) {
	CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI);
	return;
	}

	CodeGenFunction::AutoVarEmission var = CGF.EmitAutoVarAlloca(*CatchParam);
	CPI->setArgOperand(2, var.getObjectAddress(CGF).getPointer());
	CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI);
	CGF.EmitAutoVarCleanups(var);
	}

	/// We need to perform a generic polymorphic operation (like a typeid
	/// or a cast), which requires an object with a vfptr. Adjust the
	/// address to point to an object with a vfptr.
	std::pair<Address, llvm::Value *>
	MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, Address Value,
	QualType SrcRecordTy) {
	Value = CGF.Builder.CreateBitCast(Value, CGF.Int8PtrTy);
	const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
	const ASTContext &Context = getContext();

	// If the class itself has a vfptr, great. This check implicitly
	// covers non-virtual base subobjects: a class with its own virtual
	// functions would be a candidate to be a primary base.
	if (Context.getASTRecordLayout(SrcDecl).hasExtendableVFPtr())
	return std::make_pair(Value, llvm::ConstantInt::get(CGF.Int32Ty, 0));

	// Okay, one of the vbases must have a vfptr, or else this isn't
	// actually a polymorphic class.
	const CXXRecordDecl *PolymorphicBase = nullptr;
	for (auto &Base : SrcDecl->vbases()) {
	const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
	if (Context.getASTRecordLayout(BaseDecl).hasExtendableVFPtr()) {
	PolymorphicBase = BaseDecl;
	break;
	}
	}
	assert(PolymorphicBase && "polymorphic class has no apparent vfptr?");

	llvm::Value *Offset =
	GetVirtualBaseClassOffset(CGF, Value, SrcDecl, PolymorphicBase);
	llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(Value.getPointer(), Offset);
	CharUnits VBaseAlign =
	CGF.CGM.getVBaseAlignment(Value.getAlignment(), SrcDecl, PolymorphicBase);
	return std::make_pair(Address(Ptr, VBaseAlign), Offset);
	}

	bool MicrosoftCXXABI::shouldTypeidBeNullChecked(bool IsDeref,
	QualType SrcRecordTy) {
	const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
	return IsDeref &&
	!getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr();
	}

	static llvm::CallSite emitRTtypeidCall(CodeGenFunction &CGF,
	llvm::Value *Argument) {
	llvm::Type *ArgTypes[] = {CGF.Int8PtrTy};
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false);
	llvm::Value *Args[] = {Argument};
	llvm::Constant *Fn = CGF.CGM.CreateRuntimeFunction(FTy, "__RTtypeid");
	return CGF.EmitRuntimeCallOrInvoke(Fn, Args);
	}

	void MicrosoftCXXABI::EmitBadTypeidCall(CodeGenFunction &CGF) {
	llvm::CallSite Call =
	emitRTtypeidCall(CGF, llvm::Constant::getNullValue(CGM.VoidPtrTy));
	Call.setDoesNotReturn();
	CGF.Builder.CreateUnreachable();
	}

	llvm::Value *MicrosoftCXXABI::EmitTypeid(CodeGenFunction &CGF,
	QualType SrcRecordTy,
	Address ThisPtr,
	llvm::Type *StdTypeInfoPtrTy) {
	std::tie(ThisPtr, std::ignore) =
	performBaseAdjustment(CGF, ThisPtr, SrcRecordTy);
	auto Typeid = emitRTtypeidCall(CGF, ThisPtr.getPointer()).getInstruction();
	return CGF.Builder.CreateBitCast(Typeid, StdTypeInfoPtrTy);
	}

	bool MicrosoftCXXABI::shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
	QualType SrcRecordTy) {
	const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
	return SrcIsPtr &&
	!getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr();
	}

	llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall(
	CodeGenFunction &CGF, Address This, QualType SrcRecordTy,
	QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) {
	llvm::Type *DestLTy = CGF.ConvertType(DestTy);

	llvm::Value *SrcRTTI =
	CGF.CGM.GetAddrOfRTTIDescriptor(SrcRecordTy.getUnqualifiedType());
	llvm::Value *DestRTTI =
	CGF.CGM.GetAddrOfRTTIDescriptor(DestRecordTy.getUnqualifiedType());

	llvm::Value *Offset;
	std::tie(This, Offset) = performBaseAdjustment(CGF, This, SrcRecordTy);
	llvm::Value *ThisPtr = This.getPointer();
	Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty);

	// PVOID __RTDynamicCast(
	// PVOID inptr,
	// LONG VfDelta,
	// PVOID SrcType,
	// PVOID TargetType,
	// BOOL isReference)
	llvm::Type *ArgTypes[] = {CGF.Int8PtrTy, CGF.Int32Ty, CGF.Int8PtrTy,
	CGF.Int8PtrTy, CGF.Int32Ty};
	llvm::Constant *Function = CGF.CGM.CreateRuntimeFunction(
	llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false),
	"__RTDynamicCast");
	llvm::Value *Args[] = {
	ThisPtr, Offset, SrcRTTI, DestRTTI,
	llvm::ConstantInt::get(CGF.Int32Ty, DestTy->isReferenceType())};
	ThisPtr = CGF.EmitRuntimeCallOrInvoke(Function, Args).getInstruction();
	return CGF.Builder.CreateBitCast(ThisPtr, DestLTy);
	}

	llvm::Value *
	MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value,
	QualType SrcRecordTy,
	QualType DestTy) {
	std::tie(Value, std::ignore) = performBaseAdjustment(CGF, Value, SrcRecordTy);

	// PVOID __RTCastToVoid(
	// PVOID inptr)
	llvm::Type *ArgTypes[] = {CGF.Int8PtrTy};
	llvm::Constant *Function = CGF.CGM.CreateRuntimeFunction(
	llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false),
	"__RTCastToVoid");
	llvm::Value *Args[] = {Value.getPointer()};
	return CGF.EmitRuntimeCall(Function, Args);
	}

	bool MicrosoftCXXABI::EmitBadCastCall(CodeGenFunction &CGF) {
	return false;
	}

	llvm::Value *MicrosoftCXXABI::GetVirtualBaseClassOffset(
	CodeGenFunction &CGF, Address This, const CXXRecordDecl *ClassDecl,
	const CXXRecordDecl *BaseClassDecl) {
	const ASTContext &Context = getContext();
	int64_t VBPtrChars =
	Context.getASTRecordLayout(ClassDecl).getVBPtrOffset().getQuantity();
	llvm::Value *VBPtrOffset = llvm::ConstantInt::get(CGM.PtrDiffTy, VBPtrChars);
	CharUnits IntSize = Context.getTypeSizeInChars(Context.IntTy);
	CharUnits VBTableChars =
	IntSize *
	CGM.getMicrosoftVTableContext().getVBTableIndex(ClassDecl, BaseClassDecl);
	llvm::Value *VBTableOffset =
	llvm::ConstantInt::get(CGM.IntTy, VBTableChars.getQuantity());

	llvm::Value *VBPtrToNewBase =
	GetVBaseOffsetFromVBPtr(CGF, This, VBPtrOffset, VBTableOffset);
	VBPtrToNewBase =
	CGF.Builder.CreateSExtOrBitCast(VBPtrToNewBase, CGM.PtrDiffTy);
	return CGF.Builder.CreateNSWAdd(VBPtrOffset, VBPtrToNewBase);
	}

	bool MicrosoftCXXABI::HasThisReturn(GlobalDecl GD) const {
	return isa<CXXConstructorDecl>(GD.getDecl());
	}

	static bool isDeletingDtor(GlobalDecl GD) {
	return isa<CXXDestructorDecl>(GD.getDecl()) &&
	GD.getDtorType() == Dtor_Deleting;
	}

	bool MicrosoftCXXABI::hasMostDerivedReturn(GlobalDecl GD) const {
	return isDeletingDtor(GD);
	}

	bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
	const CXXRecordDecl *RD = FI.getReturnType()->getAsCXXRecordDecl();
	if (!RD)
	return false;

	CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
	if (FI.isInstanceMethod()) {
	// If it's an instance method, aggregates are always returned indirectly via
	// the second parameter.
	FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /ByVal=/false);
	FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod());
	return true;
	} else if (!RD->isPOD()) {
	// If it's a free function, non-POD types are returned indirectly.
	FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /ByVal=/false);
	return true;
	}

	// Otherwise, use the C ABI rules.
	return false;
	}

	llvm::BasicBlock *
	MicrosoftCXXABI::EmitCtorCompleteObjectHandler(CodeGenFunction &CGF,
	const CXXRecordDecl *RD) {
	llvm::Value *IsMostDerivedClass = getStructorImplicitParamValue(CGF);
	assert(IsMostDerivedClass &&
	"ctor for a class with virtual bases must have an implicit parameter");
	llvm::Value *IsCompleteObject =
	CGF.Builder.CreateIsNotNull(IsMostDerivedClass, "is_complete_object");

	llvm::BasicBlock *CallVbaseCtorsBB = CGF.createBasicBlock("ctor.init_vbases");
	llvm::BasicBlock *SkipVbaseCtorsBB = CGF.createBasicBlock("ctor.skip_vbases");
	CGF.Builder.CreateCondBr(IsCompleteObject,
	CallVbaseCtorsBB, SkipVbaseCtorsBB);

	CGF.EmitBlock(CallVbaseCtorsBB);

	// Fill in the vbtable pointers here.
	EmitVBPtrStores(CGF, RD);

	// CGF will put the base ctor calls in this basic block for us later.

	return SkipVbaseCtorsBB;
	}

	llvm::BasicBlock *
	MicrosoftCXXABI::EmitDtorCompleteObjectHandler(CodeGenFunction &CGF) {
	llvm::Value *IsMostDerivedClass = getStructorImplicitParamValue(CGF);
	assert(IsMostDerivedClass &&
	"ctor for a class with virtual bases must have an implicit parameter");
	llvm::Value *IsCompleteObject =
	CGF.Builder.CreateIsNotNull(IsMostDerivedClass, "is_complete_object");

	llvm::BasicBlock *CallVbaseDtorsBB = CGF.createBasicBlock("Dtor.dtor_vbases");
	llvm::BasicBlock *SkipVbaseDtorsBB = CGF.createBasicBlock("Dtor.skip_vbases");
	CGF.Builder.CreateCondBr(IsCompleteObject,
	CallVbaseDtorsBB, SkipVbaseDtorsBB);

	CGF.EmitBlock(CallVbaseDtorsBB);
	// CGF will put the base dtor calls in this basic block for us later.

	return SkipVbaseDtorsBB;
	}

	void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers(
	CodeGenFunction &CGF, const CXXRecordDecl *RD) {
	// In most cases, an override for a vbase virtual method can adjust
	// the "this" parameter by applying a constant offset.
	// However, this is not enough while a constructor or a destructor of some
	// class X is being executed if all the following conditions are met:
	// - X has virtual bases, (1)
	// - X overrides a virtual method M of a vbase Y, (2)
	// - X itself is a vbase of the most derived class.
	//
	// If (1) and (2) are true, the vtorDisp for vbase Y is a hidden member of X
	// which holds the extra amount of "this" adjustment we must do when we use
	// the X vftables (i.e. during X ctor or dtor).
	// Outside the ctors and dtors, the values of vtorDisps are zero.

	const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
	typedef ASTRecordLayout::VBaseOffsetsMapTy VBOffsets;
	const VBOffsets &VBaseMap = Layout.getVBaseOffsetsMap();
	CGBuilderTy &Builder = CGF.Builder;

	unsigned AS = getThisAddress(CGF).getAddressSpace();
	llvm::Value *Int8This = nullptr; // Initialize lazily.

	for (VBOffsets::const_iterator I = VBaseMap.begin(), E = VBaseMap.end();
	I != E; ++I) {
	if (!I->second.hasVtorDisp())
	continue;

	llvm::Value *VBaseOffset =
	GetVirtualBaseClassOffset(CGF, getThisAddress(CGF), RD, I->first);
	uint64_t ConstantVBaseOffset =
	Layout.getVBaseClassOffset(I->first).getQuantity();

	// vtorDisp_for_vbase = vbptr[vbase_idx] - offsetof(RD, vbase).
	llvm::Value *VtorDispValue = Builder.CreateSub(
	VBaseOffset, llvm::ConstantInt::get(CGM.PtrDiffTy, ConstantVBaseOffset),
	"vtordisp.value");
	VtorDispValue = Builder.CreateTruncOrBitCast(VtorDispValue, CGF.Int32Ty);

	if (!Int8This)
	Int8This = Builder.CreateBitCast(getThisValue(CGF),
	CGF.Int8Ty->getPointerTo(AS));
	llvm::Value *VtorDispPtr = Builder.CreateInBoundsGEP(Int8This, VBaseOffset);
	// vtorDisp is always the 32-bits before the vbase in the class layout.
	VtorDispPtr = Builder.CreateConstGEP1_32(VtorDispPtr, -4);
	VtorDispPtr = Builder.CreateBitCast(
	VtorDispPtr, CGF.Int32Ty->getPointerTo(AS), "vtordisp.ptr");

	Builder.CreateAlignedStore(VtorDispValue, VtorDispPtr,
	CharUnits::fromQuantity(4));
	}
	}

	static bool hasDefaultCXXMethodCC(ASTContext &Context,
	const CXXMethodDecl *MD) {
	CallingConv ExpectedCallingConv = Context.getDefaultCallingConvention(
	/IsVariadic=/false, /IsCXXMethod=/true);
	CallingConv ActualCallingConv =
	MD->getType()->getAs<FunctionProtoType>()->getCallConv();
	return ExpectedCallingConv == ActualCallingConv;
	}

	void MicrosoftCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
	// There's only one constructor type in this ABI.
	CGM.EmitGlobal(GlobalDecl(D, Ctor_Complete));

	// Exported default constructors either have a simple call-site where they use
	// the typical calling convention and have a single 'this' pointer for an
	// argument -or- they get a wrapper function which appropriately thunks to the
	// real default constructor. This thunk is the default constructor closure.
	if (D->hasAttr<DLLExportAttr>() && D->isDefaultConstructor())
	if (!hasDefaultCXXMethodCC(getContext(), D) \|\| D->getNumParams() != 0) {
	llvm::Function *Fn = getAddrOfCXXCtorClosure(D, Ctor_DefaultClosure);
	Fn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
	Fn->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
	}
	}

	void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF,
	const CXXRecordDecl *RD) {
	Address This = getThisAddress(CGF);
	This = CGF.Builder.CreateElementBitCast(This, CGM.Int8Ty, "this.int8");
	const ASTContext &Context = getContext();
	const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);

	const VBTableGlobals &VBGlobals = enumerateVBTables(RD);
	for (unsigned I = 0, E = VBGlobals.VBTables->size(); I != E; ++I) {
	const std::unique_ptr<VPtrInfo> &VBT = (*VBGlobals.VBTables)[I];
	llvm::GlobalVariable *GV = VBGlobals.Globals[I];
	const ASTRecordLayout &SubobjectLayout =
	Context.getASTRecordLayout(VBT->IntroducingObject);
	CharUnits Offs = VBT->NonVirtualOffset;
	Offs += SubobjectLayout.getVBPtrOffset();
	if (VBT->getVBaseWithVPtr())
	Offs += Layout.getVBaseClassOffset(VBT->getVBaseWithVPtr());
	Address VBPtr = CGF.Builder.CreateConstInBoundsByteGEP(This, Offs);
	llvm::Value *GVPtr =
	CGF.Builder.CreateConstInBoundsGEP2_32(GV->getValueType(), GV, 0, 0);
	VBPtr = CGF.Builder.CreateElementBitCast(VBPtr, GVPtr->getType(),
	"vbptr." + VBT->ObjectWithVPtr->getName());
	CGF.Builder.CreateStore(GVPtr, VBPtr);
	}
	}

	CGCXXABI::AddedStructorArgs
	MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T,
	SmallVectorImpl<CanQualType> &ArgTys) {
	AddedStructorArgs Added;
	// TODO: 'for base' flag
	if (T == StructorType::Deleting) {
	// The scalar deleting destructor takes an implicit int parameter.
	ArgTys.push_back(getContext().IntTy);
	++Added.Suffix;
	}
	auto *CD = dyn_cast<CXXConstructorDecl>(MD);
	if (!CD)
	return Added;

	// All parameters are already in place except is_most_derived, which goes
	// after 'this' if it's variadic and last if it's not.

	const CXXRecordDecl *Class = CD->getParent();
	const FunctionProtoType *FPT = CD->getType()->castAs<FunctionProtoType>();
	if (Class->getNumVBases()) {
	if (FPT->isVariadic()) {
	ArgTys.insert(ArgTys.begin() + 1, getContext().IntTy);
	++Added.Prefix;
	} else {
	ArgTys.push_back(getContext().IntTy);
	++Added.Suffix;
	}
	}

	return Added;
	}

	void MicrosoftCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) {
	// The TU defining a dtor is only guaranteed to emit a base destructor. All
	// other destructor variants are delegating thunks.
	CGM.EmitGlobal(GlobalDecl(D, Dtor_Base));
	}

	CharUnits
	MicrosoftCXXABI::getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) {
	GD = GD.getCanonicalDecl();
	const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());

	GlobalDecl LookupGD = GD;
	if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) {
	// Complete destructors take a pointer to the complete object as a
	// parameter, thus don't need this adjustment.
	if (GD.getDtorType() == Dtor_Complete)
	return CharUnits();

	// There's no Dtor_Base in vftable but it shares the this adjustment with
	// the deleting one, so look it up instead.
	LookupGD = GlobalDecl(DD, Dtor_Deleting);
	}

	MicrosoftVTableContext::MethodVFTableLocation ML =
	CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD);
	CharUnits Adjustment = ML.VFPtrOffset;

	// Normal virtual instance methods need to adjust from the vfptr that first
	// defined the virtual method to the virtual base subobject, but destructors
	// do not. The vector deleting destructor thunk applies this adjustment for
	// us if necessary.
	if (isa<CXXDestructorDecl>(MD))
	Adjustment = CharUnits::Zero();

	if (ML.VBase) {
	const ASTRecordLayout &DerivedLayout =
	getContext().getASTRecordLayout(MD->getParent());
	Adjustment += DerivedLayout.getVBaseClassOffset(ML.VBase);
	}

	return Adjustment;
	}

	Address MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall(
	CodeGenFunction &CGF, GlobalDecl GD, Address This,
	bool VirtualCall) {
	if (!VirtualCall) {
	// If the call of a virtual function is not virtual, we just have to
	// compensate for the adjustment the virtual function does in its prologue.
	CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(GD);
	if (Adjustment.isZero())
	return This;

	This = CGF.Builder.CreateElementBitCast(This, CGF.Int8Ty);
	assert(Adjustment.isPositive());
	return CGF.Builder.CreateConstByteGEP(This, Adjustment);
	}

	GD = GD.getCanonicalDecl();
	const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());

	GlobalDecl LookupGD = GD;
	if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) {
	// Complete dtors take a pointer to the complete object,
	// thus don't need adjustment.
	if (GD.getDtorType() == Dtor_Complete)
	return This;

	// There's only Dtor_Deleting in vftable but it shares the this adjustment
	// with the base one, so look up the deleting one instead.
	LookupGD = GlobalDecl(DD, Dtor_Deleting);
	}
	MicrosoftVTableContext::MethodVFTableLocation ML =
	CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD);

	CharUnits StaticOffset = ML.VFPtrOffset;

	// Base destructors expect 'this' to point to the beginning of the base
	// subobject, not the first vfptr that happens to contain the virtual dtor.
	// However, we still need to apply the virtual base adjustment.
	if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base)
	StaticOffset = CharUnits::Zero();

	Address Result = This;
	if (ML.VBase) {
	Result = CGF.Builder.CreateElementBitCast(Result, CGF.Int8Ty);

	const CXXRecordDecl *Derived = MD->getParent();
	const CXXRecordDecl *VBase = ML.VBase;
	llvm::Value *VBaseOffset =
	GetVirtualBaseClassOffset(CGF, Result, Derived, VBase);
	llvm::Value *VBasePtr =
	CGF.Builder.CreateInBoundsGEP(Result.getPointer(), VBaseOffset);
	CharUnits VBaseAlign =
	CGF.CGM.getVBaseAlignment(Result.getAlignment(), Derived, VBase);
	Result = Address(VBasePtr, VBaseAlign);
	}
	if (!StaticOffset.isZero()) {
	assert(StaticOffset.isPositive());
	Result = CGF.Builder.CreateElementBitCast(Result, CGF.Int8Ty);
	if (ML.VBase) {
	// Non-virtual adjustment might result in a pointer outside the allocated
	// object, e.g. if the final overrider class is laid out after the virtual
	// base that declares a method in the most derived class.
	// FIXME: Update the code that emits this adjustment in thunks prologues.
	Result = CGF.Builder.CreateConstByteGEP(Result, StaticOffset);
	} else {
	Result = CGF.Builder.CreateConstInBoundsByteGEP(Result, StaticOffset);
	}
	}
	return Result;
	}

	void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
	QualType &ResTy,
	FunctionArgList &Params) {
	ASTContext &Context = getContext();
	const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
	assert(isa<CXXConstructorDecl>(MD) \|\| isa<CXXDestructorDecl>(MD));
	if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) {
	auto *IsMostDerived = ImplicitParamDecl::Create(
	Context, /DC=/nullptr, CGF.CurGD.getDecl()->getLocation(),
	&Context.Idents.get("is_most_derived"), Context.IntTy,
	ImplicitParamDecl::Other);
	// The 'most_derived' parameter goes second if the ctor is variadic and last
	// if it's not. Dtors can't be variadic.
	const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
	if (FPT->isVariadic())
	Params.insert(Params.begin() + 1, IsMostDerived);
	else
	Params.push_back(IsMostDerived);
	getStructorImplicitParamDecl(CGF) = IsMostDerived;
	} else if (isDeletingDtor(CGF.CurGD)) {
	auto *ShouldDelete = ImplicitParamDecl::Create(
	Context, /DC=/nullptr, CGF.CurGD.getDecl()->getLocation(),
	&Context.Idents.get("should_call_delete"), Context.IntTy,
	ImplicitParamDecl::Other);
	Params.push_back(ShouldDelete);
	getStructorImplicitParamDecl(CGF) = ShouldDelete;
	}
	}

	llvm::Value *MicrosoftCXXABI::adjustThisParameterInVirtualFunctionPrologue(
	CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) {
	// In this ABI, every virtual function takes a pointer to one of the
	// subobjects that first defines it as the 'this' parameter, rather than a
	// pointer to the final overrider subobject. Thus, we need to adjust it back
	// to the final overrider subobject before use.
	// See comments in the MicrosoftVFTableContext implementation for the details.
	CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(GD);
	if (Adjustment.isZero())
	return This;

	unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace();
	llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS),
	*thisTy = This->getType();

	This = CGF.Builder.CreateBitCast(This, charPtrTy);
	assert(Adjustment.isPositive());
	This = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, This,
	-Adjustment.getQuantity());
	return CGF.Builder.CreateBitCast(This, thisTy);
	}

	void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
	// Naked functions have no prolog.
	if (CGF.CurFuncDecl && CGF.CurFuncDecl->hasAttr<NakedAttr>())
	return;

	EmitThisParam(CGF);

	/// If this is a function that the ABI specifies returns 'this', initialize
	/// the return slot to 'this' at the start of the function.
	///
	/// Unlike the setting of return types, this is done within the ABI
	/// implementation instead of by clients of CGCXXABI because:
	/// 1) getThisValue is currently protected
	/// 2) in theory, an ABI could implement 'this' returns some other way;
	/// HasThisReturn only specifies a contract, not the implementation
	if (HasThisReturn(CGF.CurGD))
	CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue);
	else if (hasMostDerivedReturn(CGF.CurGD))
	CGF.Builder.CreateStore(CGF.EmitCastToVoidPtr(getThisValue(CGF)),
	CGF.ReturnValue);

	const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl());
	if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) {
	assert(getStructorImplicitParamDecl(CGF) &&
	"no implicit parameter for a constructor with virtual bases?");
	getStructorImplicitParamValue(CGF)
	= CGF.Builder.CreateLoad(
	CGF.GetAddrOfLocalVar(getStructorImplicitParamDecl(CGF)),
	"is_most_derived");
	}

	if (isDeletingDtor(CGF.CurGD)) {
	assert(getStructorImplicitParamDecl(CGF) &&
	"no implicit parameter for a deleting destructor?");
	getStructorImplicitParamValue(CGF)
	= CGF.Builder.CreateLoad(
	CGF.GetAddrOfLocalVar(getStructorImplicitParamDecl(CGF)),
	"should_call_delete");
	}
	}

	CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs(
	CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type,
	bool ForVirtualBase, bool Delegating, CallArgList &Args) {
	assert(Type == Ctor_Complete \|\| Type == Ctor_Base);

	// Check if we need a 'most_derived' parameter.
	if (!D->getParent()->getNumVBases())
	return AddedStructorArgs{};

	// Add the 'most_derived' argument second if we are variadic or last if not.
	const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
	llvm::Value *MostDerivedArg;
	if (Delegating) {
	MostDerivedArg = getStructorImplicitParamValue(CGF);
	} else {
	MostDerivedArg = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
	}
	RValue RV = RValue::get(MostDerivedArg);
	if (FPT->isVariadic()) {
	Args.insert(Args.begin() + 1,
	CallArg(RV, getContext().IntTy, /needscopy=/false));
	return AddedStructorArgs::prefix(1);
	}
	Args.add(RV, getContext().IntTy);
	return AddedStructorArgs::suffix(1);
	}

	void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
	const CXXDestructorDecl *DD,
	CXXDtorType Type, bool ForVirtualBase,
	bool Delegating, Address This) {
	CGCallee Callee = CGCallee::forDirect(
	CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)),
	DD);

	if (DD->isVirtual()) {
	assert(Type != CXXDtorType::Dtor_Deleting &&
	"The deleting destructor should only be called via a virtual call");
	This = adjustThisArgumentForVirtualFunctionCall(CGF, GlobalDecl(DD, Type),
	This, false);
	}

	llvm::BasicBlock *BaseDtorEndBB = nullptr;
	if (ForVirtualBase && isa<CXXConstructorDecl>(CGF.CurCodeDecl)) {
	BaseDtorEndBB = EmitDtorCompleteObjectHandler(CGF);
	}

	CGF.EmitCXXDestructorCall(DD, Callee, This.getPointer(),
	/ImplicitParam=/nullptr,
	/ImplicitParamTy=/QualType(), nullptr,
	getFromDtorType(Type));
	if (BaseDtorEndBB) {
	// Complete object handler should continue to be the remaining
	CGF.Builder.CreateBr(BaseDtorEndBB);
	CGF.EmitBlock(BaseDtorEndBB);
	}
	}

	void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo &Info,
	const CXXRecordDecl *RD,
	llvm::GlobalVariable *VTable) {
	if (!CGM.getCodeGenOpts().LTOUnit)
	return;

	// The location of the first virtual function pointer in the virtual table,
	// aka the "address point" on Itanium. This is at offset 0 if RTTI is
	// disabled, or sizeof(void*) if RTTI is enabled.
	CharUnits AddressPoint =
	getContext().getLangOpts().RTTIData
	? getContext().toCharUnitsFromBits(
	getContext().getTargetInfo().getPointerWidth(0))
	: CharUnits::Zero();

	if (Info.PathToIntroducingObject.empty()) {
	CGM.AddVTableTypeMetadata(VTable, AddressPoint, RD);
	return;
	}

	// Add a bitset entry for the least derived base belonging to this vftable.
	CGM.AddVTableTypeMetadata(VTable, AddressPoint,
	Info.PathToIntroducingObject.back());

	// Add a bitset entry for each derived class that is laid out at the same
	// offset as the least derived base.
	for (unsigned I = Info.PathToIntroducingObject.size() - 1; I != 0; --I) {
	const CXXRecordDecl *DerivedRD = Info.PathToIntroducingObject[I - 1];
	const CXXRecordDecl *BaseRD = Info.PathToIntroducingObject[I];

	const ASTRecordLayout &Layout =
	getContext().getASTRecordLayout(DerivedRD);
	CharUnits Offset;
	auto VBI = Layout.getVBaseOffsetsMap().find(BaseRD);
	if (VBI == Layout.getVBaseOffsetsMap().end())
	Offset = Layout.getBaseClassOffset(BaseRD);
	else
	Offset = VBI->second.VBaseOffset;
	if (!Offset.isZero())
	return;
	CGM.AddVTableTypeMetadata(VTable, AddressPoint, DerivedRD);
	}

	// Finally do the same for the most derived class.
	if (Info.FullOffsetInMDC.isZero())
	CGM.AddVTableTypeMetadata(VTable, AddressPoint, RD);
	}

	void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
	const CXXRecordDecl *RD) {
	MicrosoftVTableContext &VFTContext = CGM.getMicrosoftVTableContext();
	const VPtrInfoVector &VFPtrs = VFTContext.getVFPtrOffsets(RD);

	for (const std::unique_ptr<VPtrInfo>& Info : VFPtrs) {
	llvm::GlobalVariable *VTable = getAddrOfVTable(RD, Info->FullOffsetInMDC);
	if (VTable->hasInitializer())
	continue;

	const VTableLayout &VTLayout =
	VFTContext.getVFTableLayout(RD, Info->FullOffsetInMDC);

	llvm::Constant *RTTI = nullptr;
	if (any_of(VTLayout.vtable_components(),
	[](const VTableComponent &VTC) { return VTC.isRTTIKind(); }))
	RTTI = getMSCompleteObjectLocator(RD, *Info);

	ConstantInitBuilder Builder(CGM);
	auto Components = Builder.beginStruct();
	CGVT.createVTableInitializer(Components, VTLayout, RTTI);
	Components.finishAndSetAsInitializer(VTable);

	emitVTableTypeMetadata(*Info, RD, VTable);
	}
	}

	bool MicrosoftCXXABI::isVirtualOffsetNeededForVTableField(
	CodeGenFunction &CGF, CodeGenFunction::VPtr Vptr) {
	return Vptr.NearestVBase != nullptr;
	}

	llvm::Value *MicrosoftCXXABI::getVTableAddressPointInStructor(
	CodeGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
	const CXXRecordDecl *NearestVBase) {
	llvm::Constant *VTableAddressPoint = getVTableAddressPoint(Base, VTableClass);
	if (!VTableAddressPoint) {
	assert(Base.getBase()->getNumVBases() &&
	!getContext().getASTRecordLayout(Base.getBase()).hasOwnVFPtr());
	}
	return VTableAddressPoint;
	}

	static void mangleVFTableName(MicrosoftMangleContext &MangleContext,
	const CXXRecordDecl *RD, const VPtrInfo &VFPtr,
	SmallString<256> &Name) {
	llvm::raw_svector_ostream Out(Name);
	MangleContext.mangleCXXVFTable(RD, VFPtr.MangledPath, Out);
	}

	llvm::Constant *
	MicrosoftCXXABI::getVTableAddressPoint(BaseSubobject Base,
	const CXXRecordDecl *VTableClass) {
	(void)getAddrOfVTable(VTableClass, Base.getBaseOffset());
	VFTableIdTy ID(VTableClass, Base.getBaseOffset());
	return VFTablesMap[ID];
	}

	llvm::Constant *MicrosoftCXXABI::getVTableAddressPointForConstExpr(
	BaseSubobject Base, const CXXRecordDecl *VTableClass) {
	llvm::Constant *VFTable = getVTableAddressPoint(Base, VTableClass);
	assert(VFTable && "Couldn't find a vftable for the given base?");
	return VFTable;
	}

	llvm::GlobalVariable MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl RD,
	CharUnits VPtrOffset) {
	// getAddrOfVTable may return 0 if asked to get an address of a vtable which
	// shouldn't be used in the given record type. We want to cache this result in
	// VFTablesMap, thus a simple zero check is not sufficient.

	VFTableIdTy ID(RD, VPtrOffset);
	VTablesMapTy::iterator I;
	bool Inserted;
	std::tie(I, Inserted) = VTablesMap.insert(std::make_pair(ID, nullptr));
	if (!Inserted)
	return I->second;

	llvm::GlobalVariable *&VTable = I->second;

	MicrosoftVTableContext &VTContext = CGM.getMicrosoftVTableContext();
	const VPtrInfoVector &VFPtrs = VTContext.getVFPtrOffsets(RD);

	if (DeferredVFTables.insert(RD).second) {
	// We haven't processed this record type before.
	// Queue up this vtable for possible deferred emission.
	CGM.addDeferredVTable(RD);

	#ifndef NDEBUG
	// Create all the vftables at once in order to make sure each vftable has
	// a unique mangled name.
	llvm::StringSet<> ObservedMangledNames;
	for (size_t J = 0, F = VFPtrs.size(); J != F; ++J) {
	SmallString<256> Name;
	mangleVFTableName(getMangleContext(), RD, *VFPtrs[J], Name);
	if (!ObservedMangledNames.insert(Name.str()).second)
	llvm_unreachable("Already saw this mangling before?");
	}
	#endif
	}

	const std::unique_ptr<VPtrInfo> *VFPtrI = std::find_if(
	VFPtrs.begin(), VFPtrs.end(), [&](const std::unique_ptr<VPtrInfo>& VPI) {
	return VPI->FullOffsetInMDC == VPtrOffset;
	});
	if (VFPtrI == VFPtrs.end()) {
	VFTablesMap[ID] = nullptr;
	return nullptr;
	}
	const std::unique_ptr<VPtrInfo> &VFPtr = *VFPtrI;

	SmallString<256> VFTableName;
	mangleVFTableName(getMangleContext(), RD, *VFPtr, VFTableName);

	// Classes marked __declspec(dllimport) need vftables generated on the
	// import-side in order to support features like constexpr. No other
	// translation unit relies on the emission of the local vftable, translation
	// units are expected to generate them as needed.
	//
	// Because of this unique behavior, we maintain this logic here instead of
	// getVTableLinkage.
	llvm::GlobalValue::LinkageTypes VFTableLinkage =
	RD->hasAttr<DLLImportAttr>() ? llvm::GlobalValue::LinkOnceODRLinkage
	: CGM.getVTableLinkage(RD);
	bool VFTableComesFromAnotherTU =
	llvm::GlobalValue::isAvailableExternallyLinkage(VFTableLinkage) \|\|
	llvm::GlobalValue::isExternalLinkage(VFTableLinkage);
	bool VTableAliasIsRequred =
	!VFTableComesFromAnotherTU && getContext().getLangOpts().RTTIData;

	if (llvm::GlobalValue *VFTable =
	CGM.getModule().getNamedGlobal(VFTableName)) {
	VFTablesMap[ID] = VFTable;
	VTable = VTableAliasIsRequred
	? cast<llvm::GlobalVariable>(
	cast<llvm::GlobalAlias>(VFTable)->getBaseObject())
	: cast<llvm::GlobalVariable>(VFTable);
	return VTable;
	}

	const VTableLayout &VTLayout =
	VTContext.getVFTableLayout(RD, VFPtr->FullOffsetInMDC);
	llvm::GlobalValue::LinkageTypes VTableLinkage =
	VTableAliasIsRequred ? llvm::GlobalValue::PrivateLinkage : VFTableLinkage;

	StringRef VTableName = VTableAliasIsRequred ? StringRef() : VFTableName.str();

	llvm::Type *VTableType = CGM.getVTables().getVTableType(VTLayout);

	// Create a backing variable for the contents of VTable. The VTable may
	// or may not include space for a pointer to RTTI data.
	llvm::GlobalValue *VFTable;
	VTable = new llvm::GlobalVariable(CGM.getModule(), VTableType,
	/isConstant=/true, VTableLinkage,
	/Initializer=/nullptr, VTableName);
	VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);

	llvm::Comdat *C = nullptr;
	if (!VFTableComesFromAnotherTU &&
	(llvm::GlobalValue::isWeakForLinker(VFTableLinkage) \|\|
	(llvm::GlobalValue::isLocalLinkage(VFTableLinkage) &&
	VTableAliasIsRequred)))
	C = CGM.getModule().getOrInsertComdat(VFTableName.str());

	// Only insert a pointer into the VFTable for RTTI data if we are not
	// importing it. We never reference the RTTI data directly so there is no
	// need to make room for it.
	if (VTableAliasIsRequred) {
	llvm::Value *GEPIndices[] = {llvm::ConstantInt::get(CGM.Int32Ty, 0),
	llvm::ConstantInt::get(CGM.Int32Ty, 0),
	llvm::ConstantInt::get(CGM.Int32Ty, 1)};
	// Create a GEP which points just after the first entry in the VFTable,
	// this should be the location of the first virtual method.
	llvm::Constant *VTableGEP = llvm::ConstantExpr::getInBoundsGetElementPtr(
	VTable->getValueType(), VTable, GEPIndices);
	if (llvm::GlobalValue::isWeakForLinker(VFTableLinkage)) {
	VFTableLinkage = llvm::GlobalValue::ExternalLinkage;
	if (C)
	C->setSelectionKind(llvm::Comdat::Largest);
	}
	VFTable = llvm::GlobalAlias::create(CGM.Int8PtrTy,
	/AddressSpace=/0, VFTableLinkage,
	VFTableName.str(), VTableGEP,
	&CGM.getModule());
	VFTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
	} else {
	// We don't need a GlobalAlias to be a symbol for the VTable if we won't
	// be referencing any RTTI data.
	// The GlobalVariable will end up being an appropriate definition of the
	// VFTable.
	VFTable = VTable;
	}
	if (C)
	VTable->setComdat(C);

	if (RD->hasAttr<DLLExportAttr>())
	VFTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);

	VFTablesMap[ID] = VFTable;
	return VTable;
	}

	CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
	GlobalDecl GD,
	Address This,
	llvm::Type *Ty,
	SourceLocation Loc) {
	GD = GD.getCanonicalDecl();
	CGBuilderTy &Builder = CGF.Builder;

	Ty = Ty->getPointerTo()->getPointerTo();
	Address VPtr =
	adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);

	auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl());
	llvm::Value *VTable = CGF.GetVTablePtr(VPtr, Ty, MethodDecl->getParent());

	MicrosoftVTableContext &VFTContext = CGM.getMicrosoftVTableContext();
	MicrosoftVTableContext::MethodVFTableLocation ML =
	VFTContext.getMethodVFTableLocation(GD);

	// Compute the identity of the most derived class whose virtual table is
	// located at the MethodVFTableLocation ML.
	auto getObjectWithVPtr = [&] {
	return llvm::find_if(VFTContext.getVFPtrOffsets(
	ML.VBase ? ML.VBase : MethodDecl->getParent()),
	[&](const std::unique_ptr<VPtrInfo> &Info) {
	return Info->FullOffsetInMDC == ML.VFPtrOffset;
	})
	->get()
	->ObjectWithVPtr;
	};

	llvm::Value *VFunc;
	if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) {
	VFunc = CGF.EmitVTableTypeCheckedLoad(
	getObjectWithVPtr(), VTable,
	ML.Index * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8);
	} else {
	if (CGM.getCodeGenOpts().PrepareForLTO)
	CGF.EmitTypeMetadataCodeForVCall(getObjectWithVPtr(), VTable, Loc);

	llvm::Value *VFuncPtr =
	Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
	VFunc = Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
	}

	CGCallee Callee(MethodDecl, VFunc);
	return Callee;
	}

	llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
	CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType,
	Address This, const CXXMemberCallExpr *CE) {
	assert(CE == nullptr \|\| CE->arg_begin() == CE->arg_end());
	assert(DtorType == Dtor_Deleting \|\| DtorType == Dtor_Complete);

	// We have only one destructor in the vftable but can get both behaviors
	// by passing an implicit int parameter.
	GlobalDecl GD(Dtor, Dtor_Deleting);
	const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
	Dtor, StructorType::Deleting);
	llvm::Type Ty = CGF.CGM.getTypes().GetFunctionType(FInfo);
	CGCallee Callee = getVirtualFunctionPointer(
	CGF, GD, This, Ty, CE ? CE->getLocStart() : SourceLocation());

	ASTContext &Context = getContext();
	llvm::Value *ImplicitParam = llvm::ConstantInt::get(
	llvm::IntegerType::getInt32Ty(CGF.getLLVMContext()),
	DtorType == Dtor_Deleting);

	This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
	RValue RV =
	CGF.EmitCXXDestructorCall(Dtor, Callee, This.getPointer(), ImplicitParam,
	Context.IntTy, CE, StructorType::Deleting);
	return RV.getScalarVal();
	}

	const VBTableGlobals &
	MicrosoftCXXABI::enumerateVBTables(const CXXRecordDecl *RD) {
	// At this layer, we can key the cache off of a single class, which is much
	// easier than caching each vbtable individually.
	llvm::DenseMap<const CXXRecordDecl*, VBTableGlobals>::iterator Entry;
	bool Added;
	std::tie(Entry, Added) =
	VBTablesMap.insert(std::make_pair(RD, VBTableGlobals()));
	VBTableGlobals &VBGlobals = Entry->second;
	if (!Added)
	return VBGlobals;

	MicrosoftVTableContext &Context = CGM.getMicrosoftVTableContext();
	VBGlobals.VBTables = &Context.enumerateVBTables(RD);

	// Cache the globals for all vbtables so we don't have to recompute the
	// mangled names.
	llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
	for (VPtrInfoVector::const_iterator I = VBGlobals.VBTables->begin(),
	E = VBGlobals.VBTables->end();
	I != E; ++I) {
	VBGlobals.Globals.push_back(getAddrOfVBTable(**I, RD, Linkage));
	}

	return VBGlobals;
	}

	llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk(
	const CXXMethodDecl *MD,
	const MicrosoftVTableContext::MethodVFTableLocation &ML) {
	assert(!isa<CXXConstructorDecl>(MD) && !isa<CXXDestructorDecl>(MD) &&
	"can't form pointers to ctors or virtual dtors");

	// Calculate the mangled name.
	SmallString<256> ThunkName;
	llvm::raw_svector_ostream Out(ThunkName);
	getMangleContext().mangleVirtualMemPtrThunk(MD, Out);

	// If the thunk has been generated previously, just return it.
	if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName))
	return cast<llvm::Function>(GV);

	// Create the llvm::Function.
	const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeMSMemberPointerThunk(MD);
	llvm::FunctionType *ThunkTy = CGM.getTypes().GetFunctionType(FnInfo);
	llvm::Function *ThunkFn =
	llvm::Function::Create(ThunkTy, llvm::Function::ExternalLinkage,
	ThunkName.str(), &CGM.getModule());
	assert(ThunkFn->getName() == ThunkName && "name was uniqued!");

	ThunkFn->setLinkage(MD->isExternallyVisible()
	? llvm::GlobalValue::LinkOnceODRLinkage
	: llvm::GlobalValue::InternalLinkage);
	if (MD->isExternallyVisible())
	ThunkFn->setComdat(CGM.getModule().getOrInsertComdat(ThunkFn->getName()));

	CGM.SetLLVMFunctionAttributes(MD, FnInfo, ThunkFn);
	CGM.SetLLVMFunctionAttributesForDefinition(MD, ThunkFn);

	// Add the "thunk" attribute so that LLVM knows that the return type is
	// meaningless. These thunks can be used to call functions with differing
	// return types, and the caller is required to cast the prototype
	// appropriately to extract the correct value.
	ThunkFn->addFnAttr("thunk");

	// These thunks can be compared, so they are not unnamed.
	ThunkFn->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);

	// Start codegen.
	CodeGenFunction CGF(CGM);
	CGF.CurGD = GlobalDecl(MD);
	CGF.CurFuncIsThunk = true;

	// Build FunctionArgs, but only include the implicit 'this' parameter
	// declaration.
	FunctionArgList FunctionArgs;
	buildThisParam(CGF, FunctionArgs);

	// Start defining the function.
	CGF.StartFunction(GlobalDecl(), FnInfo.getReturnType(), ThunkFn, FnInfo,
	FunctionArgs, MD->getLocation(), SourceLocation());
	EmitThisParam(CGF);

	// Load the vfptr and then callee from the vftable. The callee should have
	// adjusted 'this' so that the vfptr is at offset zero.
	llvm::Value *VTable = CGF.GetVTablePtr(
	getThisAddress(CGF), ThunkTy->getPointerTo()->getPointerTo(), MD->getParent());

	llvm::Value *VFuncPtr =
	CGF.Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
	llvm::Value *Callee =
	CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());

	CGF.EmitMustTailThunk(MD, getThisValue(CGF), Callee);

	return ThunkFn;
	}

	void MicrosoftCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) {
	const VBTableGlobals &VBGlobals = enumerateVBTables(RD);
	for (unsigned I = 0, E = VBGlobals.VBTables->size(); I != E; ++I) {
	const std::unique_ptr<VPtrInfo>& VBT = (*VBGlobals.VBTables)[I];
	llvm::GlobalVariable *GV = VBGlobals.Globals[I];
	if (GV->isDeclaration())
	emitVBTableDefinition(*VBT, RD, GV);
	}
	}

	llvm::GlobalVariable *
	MicrosoftCXXABI::getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
	llvm::GlobalVariable::LinkageTypes Linkage) {
	SmallString<256> OutName;
	llvm::raw_svector_ostream Out(OutName);
	getMangleContext().mangleCXXVBTable(RD, VBT.MangledPath, Out);
	StringRef Name = OutName.str();

	llvm::ArrayType *VBTableType =
	llvm::ArrayType::get(CGM.IntTy, 1 + VBT.ObjectWithVPtr->getNumVBases());

	assert(!CGM.getModule().getNamedGlobal(Name) &&
	"vbtable with this name already exists: mangling bug?");
	llvm::GlobalVariable *GV =
	CGM.CreateOrReplaceCXXRuntimeVariable(Name, VBTableType, Linkage);
	GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);

	if (RD->hasAttr<DLLImportAttr>())
	GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
	else if (RD->hasAttr<DLLExportAttr>())
	GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);

	if (!GV->hasExternalLinkage())
	emitVBTableDefinition(VBT, RD, GV);

	return GV;
	}

	void MicrosoftCXXABI::emitVBTableDefinition(const VPtrInfo &VBT,
	const CXXRecordDecl *RD,
	llvm::GlobalVariable *GV) const {
	const CXXRecordDecl *ObjectWithVPtr = VBT.ObjectWithVPtr;

	assert(RD->getNumVBases() && ObjectWithVPtr->getNumVBases() &&
	"should only emit vbtables for classes with vbtables");

	const ASTRecordLayout &BaseLayout =
	getContext().getASTRecordLayout(VBT.IntroducingObject);
	const ASTRecordLayout &DerivedLayout = getContext().getASTRecordLayout(RD);

	SmallVector<llvm::Constant *, 4> Offsets(1 + ObjectWithVPtr->getNumVBases(),
	nullptr);

	// The offset from ObjectWithVPtr's vbptr to itself always leads.
	CharUnits VBPtrOffset = BaseLayout.getVBPtrOffset();
	Offsets[0] = llvm::ConstantInt::get(CGM.IntTy, -VBPtrOffset.getQuantity());

	MicrosoftVTableContext &Context = CGM.getMicrosoftVTableContext();
	for (const auto &I : ObjectWithVPtr->vbases()) {
	const CXXRecordDecl *VBase = I.getType()->getAsCXXRecordDecl();
	CharUnits Offset = DerivedLayout.getVBaseClassOffset(VBase);
	assert(!Offset.isNegative());

	// Make it relative to the subobject vbptr.
	CharUnits CompleteVBPtrOffset = VBT.NonVirtualOffset + VBPtrOffset;
	if (VBT.getVBaseWithVPtr())
	CompleteVBPtrOffset +=
	DerivedLayout.getVBaseClassOffset(VBT.getVBaseWithVPtr());
	Offset -= CompleteVBPtrOffset;

	unsigned VBIndex = Context.getVBTableIndex(ObjectWithVPtr, VBase);
	assert(Offsets[VBIndex] == nullptr && "The same vbindex seen twice?");
	Offsets[VBIndex] = llvm::ConstantInt::get(CGM.IntTy, Offset.getQuantity());
	}

	assert(Offsets.size() ==
	cast<llvm::ArrayType>(cast<llvm::PointerType>(GV->getType())
	->getElementType())->getNumElements());
	llvm::ArrayType *VBTableType =
	llvm::ArrayType::get(CGM.IntTy, Offsets.size());
	llvm::Constant *Init = llvm::ConstantArray::get(VBTableType, Offsets);
	GV->setInitializer(Init);

	if (RD->hasAttr<DLLImportAttr>())
	GV->setLinkage(llvm::GlobalVariable::AvailableExternallyLinkage);
	}

	llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF,
	Address This,
	const ThisAdjustment &TA) {
	if (TA.isEmpty())
	return This.getPointer();

	This = CGF.Builder.CreateElementBitCast(This, CGF.Int8Ty);

	llvm::Value *V;
	if (TA.Virtual.isEmpty()) {
	V = This.getPointer();
	} else {
	assert(TA.Virtual.Microsoft.VtordispOffset < 0);
	// Adjust the this argument based on the vtordisp value.
	Address VtorDispPtr =
	CGF.Builder.CreateConstInBoundsByteGEP(This,
	CharUnits::fromQuantity(TA.Virtual.Microsoft.VtordispOffset));
	VtorDispPtr = CGF.Builder.CreateElementBitCast(VtorDispPtr, CGF.Int32Ty);
	llvm::Value *VtorDisp = CGF.Builder.CreateLoad(VtorDispPtr, "vtordisp");
	V = CGF.Builder.CreateGEP(This.getPointer(),
	CGF.Builder.CreateNeg(VtorDisp));

	// Unfortunately, having applied the vtordisp means that we no
	// longer really have a known alignment for the vbptr step.
	// We'll assume the vbptr is pointer-aligned.

	if (TA.Virtual.Microsoft.VBPtrOffset) {
	// If the final overrider is defined in a virtual base other than the one
	// that holds the vfptr, we have to use a vtordispex thunk which looks up
	// the vbtable of the derived class.
	assert(TA.Virtual.Microsoft.VBPtrOffset > 0);
	assert(TA.Virtual.Microsoft.VBOffsetOffset >= 0);
	llvm::Value *VBPtr;
	llvm::Value *VBaseOffset =
	GetVBaseOffsetFromVBPtr(CGF, Address(V, CGF.getPointerAlign()),
	-TA.Virtual.Microsoft.VBPtrOffset,
	TA.Virtual.Microsoft.VBOffsetOffset, &VBPtr);
	V = CGF.Builder.CreateInBoundsGEP(VBPtr, VBaseOffset);
	}
	}

	if (TA.NonVirtual) {
	// Non-virtual adjustment might result in a pointer outside the allocated
	// object, e.g. if the final overrider class is laid out after the virtual
	// base that declares a method in the most derived class.
	V = CGF.Builder.CreateConstGEP1_32(V, TA.NonVirtual);
	}

	// Don't need to bitcast back, the call CodeGen will handle this.
	return V;
	}

	llvm::Value *
	MicrosoftCXXABI::performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
	const ReturnAdjustment &RA) {
	if (RA.isEmpty())
	return Ret.getPointer();

	auto OrigTy = Ret.getType();
	Ret = CGF.Builder.CreateElementBitCast(Ret, CGF.Int8Ty);

	llvm::Value *V = Ret.getPointer();
	if (RA.Virtual.Microsoft.VBIndex) {
	assert(RA.Virtual.Microsoft.VBIndex > 0);
	int32_t IntSize = CGF.getIntSize().getQuantity();
	llvm::Value *VBPtr;
	llvm::Value *VBaseOffset =
	GetVBaseOffsetFromVBPtr(CGF, Ret, RA.Virtual.Microsoft.VBPtrOffset,
	IntSize * RA.Virtual.Microsoft.VBIndex, &VBPtr);
	V = CGF.Builder.CreateInBoundsGEP(VBPtr, VBaseOffset);
	}

	if (RA.NonVirtual)
	V = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, V, RA.NonVirtual);

	// Cast back to the original type.
	return CGF.Builder.CreateBitCast(V, OrigTy);
	}

	bool MicrosoftCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr,
	QualType elementType) {
	// Microsoft seems to completely ignore the possibility of a
	// two-argument usual deallocation function.
	return elementType.isDestructedType();
	}

	bool MicrosoftCXXABI::requiresArrayCookie(const CXXNewExpr *expr) {
	// Microsoft seems to completely ignore the possibility of a
	// two-argument usual deallocation function.
	return expr->getAllocatedType().isDestructedType();
	}

	CharUnits MicrosoftCXXABI::getArrayCookieSizeImpl(QualType type) {
	// The array cookie is always a size_t; we then pad that out to the
	// alignment of the element type.
	ASTContext &Ctx = getContext();
	return std::max(Ctx.getTypeSizeInChars(Ctx.getSizeType()),
	Ctx.getTypeAlignInChars(type));
	}

	llvm::Value *MicrosoftCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
	Address allocPtr,
	CharUnits cookieSize) {
	Address numElementsPtr =
	CGF.Builder.CreateElementBitCast(allocPtr, CGF.SizeTy);
	return CGF.Builder.CreateLoad(numElementsPtr);
	}

	Address MicrosoftCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
	Address newPtr,
	llvm::Value *numElements,
	const CXXNewExpr *expr,
	QualType elementType) {
	assert(requiresArrayCookie(expr));

	// The size of the cookie.
	CharUnits cookieSize = getArrayCookieSizeImpl(elementType);

	// Compute an offset to the cookie.
	Address cookiePtr = newPtr;

	// Write the number of elements into the appropriate slot.
	Address numElementsPtr
	= CGF.Builder.CreateElementBitCast(cookiePtr, CGF.SizeTy);
	CGF.Builder.CreateStore(numElements, numElementsPtr);

	// Finally, compute a pointer to the actual data buffer by skipping
	// over the cookie completely.
	return CGF.Builder.CreateConstInBoundsByteGEP(newPtr, cookieSize);
	}

	static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD,
	llvm::Constant *Dtor,
	llvm::Constant *Addr) {
	// Create a function which calls the destructor.
	llvm::Constant *DtorStub = CGF.createAtExitStub(VD, Dtor, Addr);

	// extern "C" int __tlregdtor(void (*f)(void));
	llvm::FunctionType *TLRegDtorTy = llvm::FunctionType::get(
	CGF.IntTy, DtorStub->getType(), /IsVarArg=/false);

	llvm::Constant *TLRegDtor = CGF.CGM.CreateRuntimeFunction(
	TLRegDtorTy, "__tlregdtor", llvm::AttributeList(), /Local=/true);
	if (llvm::Function *TLRegDtorFn = dyn_cast<llvm::Function>(TLRegDtor))
	TLRegDtorFn->setDoesNotThrow();

	CGF.EmitNounwindRuntimeCall(TLRegDtor, DtorStub);
	}

	void MicrosoftCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
	llvm::Constant *Dtor,
	llvm::Constant *Addr) {
	if (D.getTLSKind())
	return emitGlobalDtorWithTLRegDtor(CGF, D, Dtor, Addr);

	// The default behavior is to use atexit.
	CGF.registerGlobalDtorWithAtExit(D, Dtor, Addr);
	}

	void MicrosoftCXXABI::EmitThreadLocalInitFuncs(
	CodeGenModule &CGM, ArrayRef<const VarDecl *> CXXThreadLocals,
	ArrayRef<llvm::Function *> CXXThreadLocalInits,
	ArrayRef<const VarDecl *> CXXThreadLocalInitVars) {
	if (CXXThreadLocalInits.empty())
	return;

	CGM.AppendLinkerOptions(CGM.getTarget().getTriple().getArch() ==
	llvm::Triple::x86
	? "/include:___dyn_tls_init@12"
	: "/include:__dyn_tls_init");

	// This will create a GV in the .CRT$XDU section. It will point to our
	// initialization function. The CRT will call all of these function
	// pointers at start-up time and, eventually, at thread-creation time.
	auto AddToXDU = [&CGM](llvm::Function *InitFunc) {
	llvm::GlobalVariable *InitFuncPtr = new llvm::GlobalVariable(
	CGM.getModule(), InitFunc->getType(), /IsConstant=/true,
	llvm::GlobalVariable::InternalLinkage, InitFunc,
	Twine(InitFunc->getName(), "$initializer$"));
	InitFuncPtr->setSection(".CRT$XDU");
	// This variable has discardable linkage, we have to add it to @llvm.used to
	// ensure it won't get discarded.
	CGM.addUsedGlobal(InitFuncPtr);
	return InitFuncPtr;
	};

	std::vector<llvm::Function *> NonComdatInits;
	for (size_t I = 0, E = CXXThreadLocalInitVars.size(); I != E; ++I) {
	llvm::GlobalVariable *GV = cast<llvm::GlobalVariable>(
	CGM.GetGlobalValue(CGM.getMangledName(CXXThreadLocalInitVars[I])));
	llvm::Function *F = CXXThreadLocalInits[I];

	// If the GV is already in a comdat group, then we have to join it.
	if (llvm::Comdat *C = GV->getComdat())
	AddToXDU(F)->setComdat(C);
	else
	NonComdatInits.push_back(F);
	}

	if (!NonComdatInits.empty()) {
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(CGM.VoidTy, /isVarArg=/false);
	llvm::Function *InitFunc = CGM.CreateGlobalInitOrDestructFunction(
	FTy, "__tls_init", CGM.getTypes().arrangeNullaryFunction(),
	SourceLocation(), /TLS=/true);
	CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, NonComdatInits);

	AddToXDU(InitFunc);
	}
	}

	LValue MicrosoftCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
	const VarDecl *VD,
	QualType LValType) {
	CGF.CGM.ErrorUnsupported(VD, "thread wrappers");
	return LValue();
	}

	static ConstantAddress getInitThreadEpochPtr(CodeGenModule &CGM) {
	StringRef VarName("_Init_thread_epoch");
	CharUnits Align = CGM.getIntAlign();
	if (auto *GV = CGM.getModule().getNamedGlobal(VarName))
	return ConstantAddress(GV, Align);
	auto *GV = new llvm::GlobalVariable(
	CGM.getModule(), CGM.IntTy,
	/Constant=/false, llvm::GlobalVariable::ExternalLinkage,
	/Initializer=/nullptr, VarName,
	/InsertBefore=/nullptr, llvm::GlobalVariable::GeneralDynamicTLSModel);
	GV->setAlignment(Align.getQuantity());
	return ConstantAddress(GV, Align);
	}

	static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) {
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()),
	CGM.IntTy->getPointerTo(), /isVarArg=/false);
	return CGM.CreateRuntimeFunction(
	FTy, "_Init_thread_header",
	llvm::AttributeList::get(CGM.getLLVMContext(),
	llvm::AttributeList::FunctionIndex,
	llvm::Attribute::NoUnwind),
	/Local=/true);
	}

	static llvm::Constant *getInitThreadFooterFn(CodeGenModule &CGM) {
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()),
	CGM.IntTy->getPointerTo(), /isVarArg=/false);
	return CGM.CreateRuntimeFunction(
	FTy, "_Init_thread_footer",
	llvm::AttributeList::get(CGM.getLLVMContext(),
	llvm::AttributeList::FunctionIndex,
	llvm::Attribute::NoUnwind),
	/Local=/true);
	}

	static llvm::Constant *getInitThreadAbortFn(CodeGenModule &CGM) {
	llvm::FunctionType *FTy =
	llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()),
	CGM.IntTy->getPointerTo(), /isVarArg=/false);
	return CGM.CreateRuntimeFunction(
	FTy, "_Init_thread_abort",
	llvm::AttributeList::get(CGM.getLLVMContext(),
	llvm::AttributeList::FunctionIndex,
	llvm::Attribute::NoUnwind),
	/Local=/true);
	}

	namespace {
	struct ResetGuardBit final : EHScopeStack::Cleanup {
	Address Guard;
	unsigned GuardNum;
	ResetGuardBit(Address Guard, unsigned GuardNum)
	: Guard(Guard), GuardNum(GuardNum) {}

	void Emit(CodeGenFunction &CGF, Flags flags) override {
	// Reset the bit in the mask so that the static variable may be
	// reinitialized.
	CGBuilderTy &Builder = CGF.Builder;
	llvm::LoadInst *LI = Builder.CreateLoad(Guard);
	llvm::ConstantInt *Mask =
	llvm::ConstantInt::get(CGF.IntTy, ~(1ULL << GuardNum));
	Builder.CreateStore(Builder.CreateAnd(LI, Mask), Guard);
	}
	};

	struct CallInitThreadAbort final : EHScopeStack::Cleanup {
	llvm::Value *Guard;
	CallInitThreadAbort(Address Guard) : Guard(Guard.getPointer()) {}

	void Emit(CodeGenFunction &CGF, Flags flags) override {
	// Calling _Init_thread_abort will reset the guard's state.
	CGF.EmitNounwindRuntimeCall(getInitThreadAbortFn(CGF.CGM), Guard);
	}
	};
	}

	void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
	llvm::GlobalVariable *GV,
	bool PerformInit) {
	// MSVC only uses guards for static locals.
	if (!D.isStaticLocal()) {
	assert(GV->hasWeakLinkage() \|\| GV->hasLinkOnceLinkage());
	// GlobalOpt is allowed to discard the initializer, so use linkonce_odr.
	llvm::Function *F = CGF.CurFn;
	F->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
	F->setComdat(CGM.getModule().getOrInsertComdat(F->getName()));
	CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
	return;
	}

	bool ThreadlocalStatic = D.getTLSKind();
	bool ThreadsafeStatic = getContext().getLangOpts().ThreadsafeStatics;

	// Thread-safe static variables which aren't thread-specific have a
	// per-variable guard.
	bool HasPerVariableGuard = ThreadsafeStatic && !ThreadlocalStatic;

	CGBuilderTy &Builder = CGF.Builder;
	llvm::IntegerType *GuardTy = CGF.Int32Ty;
	llvm::ConstantInt *Zero = llvm::ConstantInt::get(GuardTy, 0);
	CharUnits GuardAlign = CharUnits::fromQuantity(4);

	// Get the guard variable for this function if we have one already.
	GuardInfo *GI = nullptr;
	if (ThreadlocalStatic)
	GI = &ThreadLocalGuardVariableMap[D.getDeclContext()];
	else if (!ThreadsafeStatic)
	GI = &GuardVariableMap[D.getDeclContext()];

	llvm::GlobalVariable *GuardVar = GI ? GI->Guard : nullptr;
	unsigned GuardNum;
	if (D.isExternallyVisible()) {
	// Externally visible variables have to be numbered in Sema to properly
	// handle unreachable VarDecls.
	GuardNum = getContext().getStaticLocalNumber(&D);
	assert(GuardNum > 0);
	GuardNum--;
	} else if (HasPerVariableGuard) {
	GuardNum = ThreadSafeGuardNumMap[D.getDeclContext()]++;
	} else {
	// Non-externally visible variables are numbered here in CodeGen.
	GuardNum = GI->BitIndex++;
	}

	if (!HasPerVariableGuard && GuardNum >= 32) {
	if (D.isExternallyVisible())
	ErrorUnsupportedABI(CGF, "more than 32 guarded initializations");
	GuardNum %= 32;
	GuardVar = nullptr;
	}

	if (!GuardVar) {
	// Mangle the name for the guard.
	SmallString<256> GuardName;
	{
	llvm::raw_svector_ostream Out(GuardName);
	if (HasPerVariableGuard)
	getMangleContext().mangleThreadSafeStaticGuardVariable(&D, GuardNum,
	Out);
	else
	getMangleContext().mangleStaticGuardVariable(&D, Out);
	}

	// Create the guard variable with a zero-initializer. Just absorb linkage,
	// visibility and dll storage class from the guarded variable.
	GuardVar =
	new llvm::GlobalVariable(CGM.getModule(), GuardTy, /isConstant=/false,
	GV->getLinkage(), Zero, GuardName.str());
	GuardVar->setVisibility(GV->getVisibility());
	GuardVar->setDLLStorageClass(GV->getDLLStorageClass());
	GuardVar->setAlignment(GuardAlign.getQuantity());
	if (GuardVar->isWeakForLinker())
	GuardVar->setComdat(
	CGM.getModule().getOrInsertComdat(GuardVar->getName()));
	if (D.getTLSKind())
	GuardVar->setThreadLocal(true);
	if (GI && !HasPerVariableGuard)
	GI->Guard = GuardVar;
	}

	ConstantAddress GuardAddr(GuardVar, GuardAlign);

	assert(GuardVar->getLinkage() == GV->getLinkage() &&
	"static local from the same function had different linkage");

	if (!HasPerVariableGuard) {
	// Pseudo code for the test:
	// if (!(GuardVar & MyGuardBit)) {
	// GuardVar \|= MyGuardBit;
	// ... initialize the object ...;
	// }

	// Test our bit from the guard variable.
	llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1ULL << GuardNum);
	llvm::LoadInst *LI = Builder.CreateLoad(GuardAddr);
	llvm::Value *IsInitialized =
	Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero);
	llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
	llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
	Builder.CreateCondBr(IsInitialized, EndBlock, InitBlock);

	// Set our bit in the guard variable and emit the initializer and add a global
	// destructor if appropriate.
	CGF.EmitBlock(InitBlock);
	Builder.CreateStore(Builder.CreateOr(LI, Bit), GuardAddr);
	CGF.EHStack.pushCleanup<ResetGuardBit>(EHCleanup, GuardAddr, GuardNum);
	CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
	CGF.PopCleanupBlock();
	Builder.CreateBr(EndBlock);

	// Continue.
	CGF.EmitBlock(EndBlock);
	} else {
	// Pseudo code for the test:
	// if (TSS > _Init_thread_epoch) {
	// _Init_thread_header(&TSS);
	// if (TSS == -1) {
	// ... initialize the object ...;
	// _Init_thread_footer(&TSS);
	// }
	// }
	//
	// The algorithm is almost identical to what can be found in the appendix
	// found in N2325.

	// This BasicBLock determines whether or not we have any work to do.
	llvm::LoadInst *FirstGuardLoad = Builder.CreateLoad(GuardAddr);
	FirstGuardLoad->setOrdering(llvm::AtomicOrdering::Unordered);
	llvm::LoadInst *InitThreadEpoch =
	Builder.CreateLoad(getInitThreadEpochPtr(CGM));
	llvm::Value *IsUninitialized =
	Builder.CreateICmpSGT(FirstGuardLoad, InitThreadEpoch);
	llvm::BasicBlock *AttemptInitBlock = CGF.createBasicBlock("init.attempt");
	llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
	Builder.CreateCondBr(IsUninitialized, AttemptInitBlock, EndBlock);

	// This BasicBlock attempts to determine whether or not this thread is
	// responsible for doing the initialization.
	CGF.EmitBlock(AttemptInitBlock);
	CGF.EmitNounwindRuntimeCall(getInitThreadHeaderFn(CGM),
	GuardAddr.getPointer());
	llvm::LoadInst *SecondGuardLoad = Builder.CreateLoad(GuardAddr);
	SecondGuardLoad->setOrdering(llvm::AtomicOrdering::Unordered);
	llvm::Value *ShouldDoInit =
	Builder.CreateICmpEQ(SecondGuardLoad, getAllOnesInt());
	llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
	Builder.CreateCondBr(ShouldDoInit, InitBlock, EndBlock);

	// Ok, we ended up getting selected as the initializing thread.
	CGF.EmitBlock(InitBlock);
	CGF.EHStack.pushCleanup<CallInitThreadAbort>(EHCleanup, GuardAddr);
	CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
	CGF.PopCleanupBlock();
	CGF.EmitNounwindRuntimeCall(getInitThreadFooterFn(CGM),
	GuardAddr.getPointer());
	Builder.CreateBr(EndBlock);

	CGF.EmitBlock(EndBlock);
	}
	}

	bool MicrosoftCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
	// Null-ness for function memptrs only depends on the first field, which is
	// the function pointer. The rest don't matter, so we can zero initialize.
	if (MPT->isMemberFunctionPointer())
	return true;

	// The virtual base adjustment field is always -1 for null, so if we have one
	// we can't zero initialize. The field offset is sometimes also -1 if 0 is a
	// valid field offset.
	const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
	MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
	return (!MSInheritanceAttr::hasVBTableOffsetField(Inheritance) &&
	RD->nullFieldOffsetIsZero());
	}

	llvm::Type *
	MicrosoftCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
	const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
	MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
	llvm::SmallVector<llvm::Type *, 4> fields;
	if (MPT->isMemberFunctionPointer())
	fields.push_back(CGM.VoidPtrTy); // FunctionPointerOrVirtualThunk
	else
	fields.push_back(CGM.IntTy); // FieldOffset

	if (MSInheritanceAttr::hasNVOffsetField(MPT->isMemberFunctionPointer(),
	Inheritance))
	fields.push_back(CGM.IntTy);
	if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
	fields.push_back(CGM.IntTy);
	if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
	fields.push_back(CGM.IntTy); // VirtualBaseAdjustmentOffset

	if (fields.size() == 1)
	return fields[0];
	return llvm::StructType::get(CGM.getLLVMContext(), fields);
	}

	void MicrosoftCXXABI::
	GetNullMemberPointerFields(const MemberPointerType *MPT,
	llvm::SmallVectorImpl<llvm::Constant *> &fields) {
	assert(fields.empty());
	const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
	MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
	if (MPT->isMemberFunctionPointer()) {
	// FunctionPointerOrVirtualThunk
	fields.push_back(llvm::Constant::getNullValue(CGM.VoidPtrTy));
	} else {
	if (RD->nullFieldOffsetIsZero())
	fields.push_back(getZeroInt()); // FieldOffset
	else
	fields.push_back(getAllOnesInt()); // FieldOffset
	}

	if (MSInheritanceAttr::hasNVOffsetField(MPT->isMemberFunctionPointer(),
	Inheritance))
	fields.push_back(getZeroInt());
	if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
	fields.push_back(getZeroInt());
	if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
	fields.push_back(getAllOnesInt());
	}

	llvm::Constant *
	MicrosoftCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) {
	llvm::SmallVector<llvm::Constant *, 4> fields;
	GetNullMemberPointerFields(MPT, fields);
	if (fields.size() == 1)
	return fields[0];
	llvm::Constant *Res = llvm::ConstantStruct::getAnon(fields);
	assert(Res->getType() == ConvertMemberPointerType(MPT));
	return Res;
	}

	llvm::Constant *
	MicrosoftCXXABI::EmitFullMemberPointer(llvm::Constant *FirstField,
	bool IsMemberFunction,
	const CXXRecordDecl *RD,
	CharUnits NonVirtualBaseAdjustment,
	unsigned VBTableIndex) {
	MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();

	// Single inheritance class member pointer are represented as scalars instead
	// of aggregates.
	if (MSInheritanceAttr::hasOnlyOneField(IsMemberFunction, Inheritance))
	return FirstField;

	llvm::SmallVector<llvm::Constant *, 4> fields;
	fields.push_back(FirstField);

	if (MSInheritanceAttr::hasNVOffsetField(IsMemberFunction, Inheritance))
	fields.push_back(llvm::ConstantInt::get(
	CGM.IntTy, NonVirtualBaseAdjustment.getQuantity()));

	if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance)) {
	CharUnits Offs = CharUnits::Zero();
	if (VBTableIndex)
	Offs = getContext().getASTRecordLayout(RD).getVBPtrOffset();
	fields.push_back(llvm::ConstantInt::get(CGM.IntTy, Offs.getQuantity()));
	}

	// The rest of the fields are adjusted by conversions to a more derived class.
	if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
	fields.push_back(llvm::ConstantInt::get(CGM.IntTy, VBTableIndex));

	return llvm::ConstantStruct::getAnon(fields);
	}

	llvm::Constant *
	MicrosoftCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
	CharUnits offset) {
	const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
	if (RD->getMSInheritanceModel() ==
	MSInheritanceAttr::Keyword_virtual_inheritance)
	offset -= getContext().getOffsetOfBaseWithVBPtr(RD);
	llvm::Constant *FirstField =
	llvm::ConstantInt::get(CGM.IntTy, offset.getQuantity());
	return EmitFullMemberPointer(FirstField, /IsMemberFunction=/false, RD,
	CharUnits::Zero(), /VBTableIndex=/0);
	}

	llvm::Constant *MicrosoftCXXABI::EmitMemberPointer(const APValue &MP,
	QualType MPType) {
	const MemberPointerType *DstTy = MPType->castAs<MemberPointerType>();
	const ValueDecl *MPD = MP.getMemberPointerDecl();
	if (!MPD)
	return EmitNullMemberPointer(DstTy);

	ASTContext &Ctx = getContext();
	ArrayRef<const CXXRecordDecl *> MemberPointerPath = MP.getMemberPointerPath();

	llvm::Constant *C;
	if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MPD)) {
	C = EmitMemberFunctionPointer(MD);
	} else {
	CharUnits FieldOffset = Ctx.toCharUnitsFromBits(Ctx.getFieldOffset(MPD));
	C = EmitMemberDataPointer(DstTy, FieldOffset);
	}

	if (!MemberPointerPath.empty()) {
	const CXXRecordDecl *SrcRD = cast<CXXRecordDecl>(MPD->getDeclContext());
	const Type *SrcRecTy = Ctx.getTypeDeclType(SrcRD).getTypePtr();
	const MemberPointerType *SrcTy =
	Ctx.getMemberPointerType(DstTy->getPointeeType(), SrcRecTy)
	->castAs<MemberPointerType>();

	bool DerivedMember = MP.isMemberPointerToDerivedMember();
	SmallVector<const CXXBaseSpecifier *, 4> DerivedToBasePath;
	const CXXRecordDecl *PrevRD = SrcRD;
	for (const CXXRecordDecl *PathElem : MemberPointerPath) {
	const CXXRecordDecl *Base = nullptr;
	const CXXRecordDecl *Derived = nullptr;
	if (DerivedMember) {
	Base = PathElem;
	Derived = PrevRD;
	} else {
	Base = PrevRD;
	Derived = PathElem;
	}
	for (const CXXBaseSpecifier &BS : Derived->bases())
	if (BS.getType()->getAsCXXRecordDecl()->getCanonicalDecl() ==
	Base->getCanonicalDecl())
	DerivedToBasePath.push_back(&BS);
	PrevRD = PathElem;
	}
	assert(DerivedToBasePath.size() == MemberPointerPath.size());

	CastKind CK = DerivedMember ? CK_DerivedToBaseMemberPointer
	: CK_BaseToDerivedMemberPointer;
	C = EmitMemberPointerConversion(SrcTy, DstTy, CK, DerivedToBasePath.begin(),
	DerivedToBasePath.end(), C);
	}
	return C;
	}

	llvm::Constant *
	MicrosoftCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) {
	assert(MD->isInstance() && "Member function must not be static!");

	MD = MD->getCanonicalDecl();
	CharUnits NonVirtualBaseAdjustment = CharUnits::Zero();
	const CXXRecordDecl *RD = MD->getParent()->getMostRecentDecl();
	CodeGenTypes &Types = CGM.getTypes();

	unsigned VBTableIndex = 0;
	llvm::Constant *FirstField;
	const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
	if (!MD->isVirtual()) {
	llvm::Type *Ty;
	// Check whether the function has a computable LLVM signature.
	if (Types.isFuncTypeConvertible(FPT)) {
	// The function has a computable LLVM signature; use the correct type.
	Ty = Types.GetFunctionType(Types.arrangeCXXMethodDeclaration(MD));
	} else {
	// Use an arbitrary non-function type to tell GetAddrOfFunction that the
	// function type is incomplete.
	Ty = CGM.PtrDiffTy;
	}
	FirstField = CGM.GetAddrOfFunction(MD, Ty);
	} else {
	auto &VTableContext = CGM.getMicrosoftVTableContext();
	MicrosoftVTableContext::MethodVFTableLocation ML =
	VTableContext.getMethodVFTableLocation(MD);
	FirstField = EmitVirtualMemPtrThunk(MD, ML);
	// Include the vfptr adjustment if the method is in a non-primary vftable.
	NonVirtualBaseAdjustment += ML.VFPtrOffset;
	if (ML.VBase)
	VBTableIndex = VTableContext.getVBTableIndex(RD, ML.VBase) * 4;
	}

	if (VBTableIndex == 0 &&
	RD->getMSInheritanceModel() ==
	MSInheritanceAttr::Keyword_virtual_inheritance)
	NonVirtualBaseAdjustment -= getContext().getOffsetOfBaseWithVBPtr(RD);

	// The rest of the fields are common with data member pointers.
	FirstField = llvm::ConstantExpr::getBitCast(FirstField, CGM.VoidPtrTy);
	return EmitFullMemberPointer(FirstField, /IsMemberFunction=/true, RD,
	NonVirtualBaseAdjustment, VBTableIndex);
	}

	/// Member pointers are the same if they're either bitwise identical or both
	/// null. Null-ness for function members is determined by the first field,
	/// while for data member pointers we must compare all fields.
	llvm::Value *
	MicrosoftCXXABI::EmitMemberPointerComparison(CodeGenFunction &CGF,
	llvm::Value *L,
	llvm::Value *R,
	const MemberPointerType *MPT,
	bool Inequality) {
	CGBuilderTy &Builder = CGF.Builder;

	// Handle != comparisons by switching the sense of all boolean operations.
	llvm::ICmpInst::Predicate Eq;
	llvm::Instruction::BinaryOps And, Or;
	if (Inequality) {
	Eq = llvm::ICmpInst::ICMP_NE;
	And = llvm::Instruction::Or;
	Or = llvm::Instruction::And;
	} else {
	Eq = llvm::ICmpInst::ICMP_EQ;
	And = llvm::Instruction::And;
	Or = llvm::Instruction::Or;
	}

	// If this is a single field member pointer (single inheritance), this is a
	// single icmp.
	const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
	MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
	if (MSInheritanceAttr::hasOnlyOneField(MPT->isMemberFunctionPointer(),
	Inheritance))
	return Builder.CreateICmp(Eq, L, R);

	// Compare the first field.
	llvm::Value *L0 = Builder.CreateExtractValue(L, 0, "lhs.0");
	llvm::Value *R0 = Builder.CreateExtractValue(R, 0, "rhs.0");
	llvm::Value *Cmp0 = Builder.CreateICmp(Eq, L0, R0, "memptr.cmp.first");

	// Compare everything other than the first field.
	llvm::Value *Res = nullptr;
	llvm::StructType *LType = cast<llvm::StructType>(L->getType());
	for (unsigned I = 1, E = LType->getNumElements(); I != E; ++I) {
	llvm::Value *LF = Builder.CreateExtractValue(L, I);
	llvm::Value *RF = Builder.CreateExtractValue(R, I);
	llvm::Value *Cmp = Builder.CreateICmp(Eq, LF, RF, "memptr.cmp.rest");
	if (Res)
	Res = Builder.CreateBinOp(And, Res, Cmp);
	else
	Res = Cmp;
	}

	// Check if the first field is 0 if this is a function pointer.
	if (MPT->isMemberFunctionPointer()) {
	// (l1 == r1 && ...) \|\| l0 == 0
	llvm::Value *Zero = llvm::Constant::getNullValue(L0->getType());
	llvm::Value *IsZero = Builder.CreateICmp(Eq, L0, Zero, "memptr.cmp.iszero");
	Res = Builder.CreateBinOp(Or, Res, IsZero);
	}

	// Combine the comparison of the first field, which must always be true for
	// this comparison to succeeed.
	return Builder.CreateBinOp(And, Res, Cmp0, "memptr.cmp");
	}

	llvm::Value *
	MicrosoftCXXABI::EmitMemberPointerIsNotNull(CodeGenFunction &CGF,
	llvm::Value *MemPtr,
	const MemberPointerType *MPT) {
	CGBuilderTy &Builder = CGF.Builder;
	llvm::SmallVector<llvm::Constant *, 4> fields;
	// We only need one field for member functions.
	if (MPT->isMemberFunctionPointer())
	fields.push_back(llvm::Constant::getNullValue(CGM.VoidPtrTy));
	else
	GetNullMemberPointerFields(MPT, fields);
	assert(!fields.empty());
	llvm::Value *FirstField = MemPtr;
	if (MemPtr->getType()->isStructTy())
	FirstField = Builder.CreateExtractValue(MemPtr, 0);
	llvm::Value *Res = Builder.CreateICmpNE(FirstField, fields[0], "memptr.cmp0");

	// For function member pointers, we only need to test the function pointer
	// field. The other fields if any can be garbage.
	if (MPT->isMemberFunctionPointer())
	return Res;

	// Otherwise, emit a series of compares and combine the results.
	for (int I = 1, E = fields.size(); I < E; ++I) {
	llvm::Value *Field = Builder.CreateExtractValue(MemPtr, I);
	llvm::Value *Next = Builder.CreateICmpNE(Field, fields[I], "memptr.cmp");
	Res = Builder.CreateOr(Res, Next, "memptr.tobool");
	}
	return Res;
	}

	bool MicrosoftCXXABI::MemberPointerConstantIsNull(const MemberPointerType *MPT,
	llvm::Constant *Val) {
	// Function pointers are null if the pointer in the first field is null.
	if (MPT->isMemberFunctionPointer()) {
	llvm::Constant *FirstField = Val->getType()->isStructTy() ?
	Val->getAggregateElement(0U) : Val;
	return FirstField->isNullValue();
	}

	// If it's not a function pointer and it's zero initializable, we can easily
	// check zero.
	if (isZeroInitializable(MPT) && Val->isNullValue())
	return true;

	// Otherwise, break down all the fields for comparison. Hopefully these
	// little Constants are reused, while a big null struct might not be.
	llvm::SmallVector<llvm::Constant *, 4> Fields;
	GetNullMemberPointerFields(MPT, Fields);
	if (Fields.size() == 1) {
	assert(Val->getType()->isIntegerTy());
	return Val == Fields[0];
	}

	unsigned I, E;
	for (I = 0, E = Fields.size(); I != E; ++I) {
	if (Val->getAggregateElement(I) != Fields[I])
	break;
	}
	return I == E;
	}

	llvm::Value *
	MicrosoftCXXABI::GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
	Address This,
	llvm::Value *VBPtrOffset,
	llvm::Value *VBTableOffset,
	llvm::Value **VBPtrOut) {
	CGBuilderTy &Builder = CGF.Builder;
	// Load the vbtable pointer from the vbptr in the instance.
	This = Builder.CreateElementBitCast(This, CGM.Int8Ty);
	llvm::Value *VBPtr =
	Builder.CreateInBoundsGEP(This.getPointer(), VBPtrOffset, "vbptr");
	if (VBPtrOut) *VBPtrOut = VBPtr;
	VBPtr = Builder.CreateBitCast(VBPtr,
	CGM.Int32Ty->getPointerTo(0)->getPointerTo(This.getAddressSpace()));

	CharUnits VBPtrAlign;
	if (auto CI = dyn_cast<llvm::ConstantInt>(VBPtrOffset)) {
	VBPtrAlign = This.getAlignment().alignmentAtOffset(
	CharUnits::fromQuantity(CI->getSExtValue()));
	} else {
	VBPtrAlign = CGF.getPointerAlign();
	}

	llvm::Value *VBTable = Builder.CreateAlignedLoad(VBPtr, VBPtrAlign, "vbtable");

	// Translate from byte offset to table index. It improves analyzability.
	llvm::Value *VBTableIndex = Builder.CreateAShr(
	VBTableOffset, llvm::ConstantInt::get(VBTableOffset->getType(), 2),
	"vbtindex", /isExact=/true);

	// Load an i32 offset from the vb-table.
	llvm::Value *VBaseOffs = Builder.CreateInBoundsGEP(VBTable, VBTableIndex);
	VBaseOffs = Builder.CreateBitCast(VBaseOffs, CGM.Int32Ty->getPointerTo(0));
	return Builder.CreateAlignedLoad(VBaseOffs, CharUnits::fromQuantity(4),
	"vbase_offs");
	}

	// Returns an adjusted base cast to i8*, since we do more address arithmetic on
	// it.
	llvm::Value *MicrosoftCXXABI::AdjustVirtualBase(
	CodeGenFunction &CGF, const Expr E, const CXXRecordDecl RD,
	Address Base, llvm::Value VBTableOffset, llvm::Value VBPtrOffset) {
	CGBuilderTy &Builder = CGF.Builder;
	Base = Builder.CreateElementBitCast(Base, CGM.Int8Ty);
	llvm::BasicBlock *OriginalBB = nullptr;
	llvm::BasicBlock *SkipAdjustBB = nullptr;
	llvm::BasicBlock *VBaseAdjustBB = nullptr;

	// In the unspecified inheritance model, there might not be a vbtable at all,
	// in which case we need to skip the virtual base lookup. If there is a
	// vbtable, the first entry is a no-op entry that gives back the original
	// base, so look for a virtual base adjustment offset of zero.
	if (VBPtrOffset) {
	OriginalBB = Builder.GetInsertBlock();
	VBaseAdjustBB = CGF.createBasicBlock("memptr.vadjust");
	SkipAdjustBB = CGF.createBasicBlock("memptr.skip_vadjust");
	llvm::Value *IsVirtual =
	Builder.CreateICmpNE(VBTableOffset, getZeroInt(),
	"memptr.is_vbase");
	Builder.CreateCondBr(IsVirtual, VBaseAdjustBB, SkipAdjustBB);
	CGF.EmitBlock(VBaseAdjustBB);
	}

	// If we weren't given a dynamic vbptr offset, RD should be complete and we'll
	// know the vbptr offset.
	if (!VBPtrOffset) {
	CharUnits offs = CharUnits::Zero();
	if (!RD->hasDefinition()) {
	DiagnosticsEngine &Diags = CGF.CGM.getDiags();
	unsigned DiagID = Diags.getCustomDiagID(
	DiagnosticsEngine::Error,
	"member pointer representation requires a "
	"complete class type for %0 to perform this expression");
	Diags.Report(E->getExprLoc(), DiagID) << RD << E->getSourceRange();
	} else if (RD->getNumVBases())
	offs = getContext().getASTRecordLayout(RD).getVBPtrOffset();
	VBPtrOffset = llvm::ConstantInt::get(CGM.IntTy, offs.getQuantity());
	}
	llvm::Value *VBPtr = nullptr;
	llvm::Value *VBaseOffs =
	GetVBaseOffsetFromVBPtr(CGF, Base, VBPtrOffset, VBTableOffset, &VBPtr);
	llvm::Value *AdjustedBase = Builder.CreateInBoundsGEP(VBPtr, VBaseOffs);

	// Merge control flow with the case where we didn't have to adjust.
	if (VBaseAdjustBB) {
	Builder.CreateBr(SkipAdjustBB);
	CGF.EmitBlock(SkipAdjustBB);
	llvm::PHINode *Phi = Builder.CreatePHI(CGM.Int8PtrTy, 2, "memptr.base");
	Phi->addIncoming(Base.getPointer(), OriginalBB);
	Phi->addIncoming(AdjustedBase, VBaseAdjustBB);
	return Phi;
	}
	return AdjustedBase;
	}

	llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress(
	CodeGenFunction &CGF, const Expr E, Address Base, llvm::Value MemPtr,
	const MemberPointerType *MPT) {
	assert(MPT->isMemberDataPointer());
	unsigned AS = Base.getAddressSpace();
	llvm::Type *PType =
	CGF.ConvertTypeForMem(MPT->getPointeeType())->getPointerTo(AS);
	CGBuilderTy &Builder = CGF.Builder;
	const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
	MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();

	// Extract the fields we need, regardless of model. We'll apply them if we
	// have them.
	llvm::Value *FieldOffset = MemPtr;
	llvm::Value *VirtualBaseAdjustmentOffset = nullptr;
	llvm::Value *VBPtrOffset = nullptr;
	if (MemPtr->getType()->isStructTy()) {
	// We need to extract values.
	unsigned I = 0;
	FieldOffset = Builder.CreateExtractValue(MemPtr, I++);
	if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
	VBPtrOffset = Builder.CreateExtractValue(MemPtr, I++);
	if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
	VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(MemPtr, I++);
	}

	llvm::Value *Addr;
	if (VirtualBaseAdjustmentOffset) {
	Addr = AdjustVirtualBase(CGF, E, RD, Base, VirtualBaseAdjustmentOffset,
	VBPtrOffset);
	} else {
	Addr = Base.getPointer();
	}

	// Cast to char*.
	Addr = Builder.CreateBitCast(Addr, CGF.Int8Ty->getPointerTo(AS));

	// Apply the offset, which we assume is non-null.
	Addr = Builder.CreateInBoundsGEP(Addr, FieldOffset, "memptr.offset");

	// Cast the address to the appropriate pointer type, adopting the address
	// space of the base pointer.
	return Builder.CreateBitCast(Addr, PType);
	}

	llvm::Value *
	MicrosoftCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
	const CastExpr *E,
	llvm::Value *Src) {
	assert(E->getCastKind() == CK_DerivedToBaseMemberPointer \|\|
	E->getCastKind() == CK_BaseToDerivedMemberPointer \|\|
	E->getCastKind() == CK_ReinterpretMemberPointer);

	// Use constant emission if we can.
	if (isa<llvm::Constant>(Src))
	return EmitMemberPointerConversion(E, cast<llvm::Constant>(Src));

	// We may be adding or dropping fields from the member pointer, so we need
	// both types and the inheritance models of both records.
	const MemberPointerType *SrcTy =
	E->getSubExpr()->getType()->castAs<MemberPointerType>();
	const MemberPointerType *DstTy = E->getType()->castAs<MemberPointerType>();
	bool IsFunc = SrcTy->isMemberFunctionPointer();

	// If the classes use the same null representation, reinterpret_cast is a nop.
	bool IsReinterpret = E->getCastKind() == CK_ReinterpretMemberPointer;
	if (IsReinterpret && IsFunc)
	return Src;

	CXXRecordDecl *SrcRD = SrcTy->getMostRecentCXXRecordDecl();
	CXXRecordDecl *DstRD = DstTy->getMostRecentCXXRecordDecl();
	if (IsReinterpret &&
	SrcRD->nullFieldOffsetIsZero() == DstRD->nullFieldOffsetIsZero())
	return Src;

	CGBuilderTy &Builder = CGF.Builder;

	// Branch past the conversion if Src is null.
	llvm::Value *IsNotNull = EmitMemberPointerIsNotNull(CGF, Src, SrcTy);
	llvm::Constant *DstNull = EmitNullMemberPointer(DstTy);

	// C++ 5.2.10p9: The null member pointer value is converted to the null member
	// pointer value of the destination type.
	if (IsReinterpret) {
	// For reinterpret casts, sema ensures that src and dst are both functions
	// or data and have the same size, which means the LLVM types should match.
	assert(Src->getType() == DstNull->getType());
	return Builder.CreateSelect(IsNotNull, Src, DstNull);
	}

	llvm::BasicBlock *OriginalBB = Builder.GetInsertBlock();
	llvm::BasicBlock *ConvertBB = CGF.createBasicBlock("memptr.convert");
	llvm::BasicBlock *ContinueBB = CGF.createBasicBlock("memptr.converted");
	Builder.CreateCondBr(IsNotNull, ConvertBB, ContinueBB);
	CGF.EmitBlock(ConvertBB);

	llvm::Value *Dst = EmitNonNullMemberPointerConversion(
	SrcTy, DstTy, E->getCastKind(), E->path_begin(), E->path_end(), Src,
	Builder);

	Builder.CreateBr(ContinueBB);

	// In the continuation, choose between DstNull and Dst.
	CGF.EmitBlock(ContinueBB);
	llvm::PHINode *Phi = Builder.CreatePHI(DstNull->getType(), 2, "memptr.converted");
	Phi->addIncoming(DstNull, OriginalBB);
	Phi->addIncoming(Dst, ConvertBB);
	return Phi;
	}

	llvm::Value *MicrosoftCXXABI::EmitNonNullMemberPointerConversion(
	const MemberPointerType SrcTy, const MemberPointerType DstTy, CastKind CK,
	CastExpr::path_const_iterator PathBegin,
	CastExpr::path_const_iterator PathEnd, llvm::Value *Src,
	CGBuilderTy &Builder) {
	const CXXRecordDecl *SrcRD = SrcTy->getMostRecentCXXRecordDecl();
	const CXXRecordDecl *DstRD = DstTy->getMostRecentCXXRecordDecl();
	MSInheritanceAttr::Spelling SrcInheritance = SrcRD->getMSInheritanceModel();
	MSInheritanceAttr::Spelling DstInheritance = DstRD->getMSInheritanceModel();
	bool IsFunc = SrcTy->isMemberFunctionPointer();
	bool IsConstant = isa<llvm::Constant>(Src);

	// Decompose src.
	llvm::Value *FirstField = Src;
	llvm::Value *NonVirtualBaseAdjustment = getZeroInt();
	llvm::Value *VirtualBaseAdjustmentOffset = getZeroInt();
	llvm::Value *VBPtrOffset = getZeroInt();
	if (!MSInheritanceAttr::hasOnlyOneField(IsFunc, SrcInheritance)) {
	// We need to extract values.
	unsigned I = 0;
	FirstField = Builder.CreateExtractValue(Src, I++);
	if (MSInheritanceAttr::hasNVOffsetField(IsFunc, SrcInheritance))
	NonVirtualBaseAdjustment = Builder.CreateExtractValue(Src, I++);
	if (MSInheritanceAttr::hasVBPtrOffsetField(SrcInheritance))
	VBPtrOffset = Builder.CreateExtractValue(Src, I++);
	if (MSInheritanceAttr::hasVBTableOffsetField(SrcInheritance))
	VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(Src, I++);
	}

	bool IsDerivedToBase = (CK == CK_DerivedToBaseMemberPointer);
	const MemberPointerType *DerivedTy = IsDerivedToBase ? SrcTy : DstTy;
	const CXXRecordDecl *DerivedClass = DerivedTy->getMostRecentCXXRecordDecl();

	// For data pointers, we adjust the field offset directly. For functions, we
	// have a separate field.
	llvm::Value *&NVAdjustField = IsFunc ? NonVirtualBaseAdjustment : FirstField;

	// The virtual inheritance model has a quirk: the virtual base table is always
	// referenced when dereferencing a member pointer even if the member pointer
	// is non-virtual. This is accounted for by adjusting the non-virtual offset
	// to point backwards to the top of the MDC from the first VBase. Undo this
	// adjustment to normalize the member pointer.
	llvm::Value *SrcVBIndexEqZero =
	Builder.CreateICmpEQ(VirtualBaseAdjustmentOffset, getZeroInt());
	if (SrcInheritance == MSInheritanceAttr::Keyword_virtual_inheritance) {
	if (int64_t SrcOffsetToFirstVBase =
	getContext().getOffsetOfBaseWithVBPtr(SrcRD).getQuantity()) {
	llvm::Value *UndoSrcAdjustment = Builder.CreateSelect(
	SrcVBIndexEqZero,
	llvm::ConstantInt::get(CGM.IntTy, SrcOffsetToFirstVBase),
	getZeroInt());
	NVAdjustField = Builder.CreateNSWAdd(NVAdjustField, UndoSrcAdjustment);
	}
	}

	// A non-zero vbindex implies that we are dealing with a source member in a
	// floating virtual base in addition to some non-virtual offset. If the
	// vbindex is zero, we are dealing with a source that exists in a non-virtual,
	// fixed, base. The difference between these two cases is that the vbindex +
	// nvoffset always point to the member regardless of what context they are
	// evaluated in so long as the vbindex is adjusted. A member inside a fixed
	// base requires explicit nv adjustment.
	llvm::Constant *BaseClassOffset = llvm::ConstantInt::get(
	CGM.IntTy,
	CGM.computeNonVirtualBaseClassOffset(DerivedClass, PathBegin, PathEnd)
	.getQuantity());

	llvm::Value *NVDisp;
	if (IsDerivedToBase)
	NVDisp = Builder.CreateNSWSub(NVAdjustField, BaseClassOffset, "adj");
	else
	NVDisp = Builder.CreateNSWAdd(NVAdjustField, BaseClassOffset, "adj");

	NVAdjustField = Builder.CreateSelect(SrcVBIndexEqZero, NVDisp, getZeroInt());

	// Update the vbindex to an appropriate value in the destination because
	// SrcRD's vbtable might not be a strict prefix of the one in DstRD.
	llvm::Value *DstVBIndexEqZero = SrcVBIndexEqZero;
	if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance) &&
	MSInheritanceAttr::hasVBTableOffsetField(SrcInheritance)) {
	if (llvm::GlobalVariable *VDispMap =
	getAddrOfVirtualDisplacementMap(SrcRD, DstRD)) {
	llvm::Value *VBIndex = Builder.CreateExactUDiv(
	VirtualBaseAdjustmentOffset, llvm::ConstantInt::get(CGM.IntTy, 4));
	if (IsConstant) {
	llvm::Constant *Mapping = VDispMap->getInitializer();
	VirtualBaseAdjustmentOffset =
	Mapping->getAggregateElement(cast<llvm::Constant>(VBIndex));
	} else {
	llvm::Value *Idxs[] = {getZeroInt(), VBIndex};
	VirtualBaseAdjustmentOffset =
	Builder.CreateAlignedLoad(Builder.CreateInBoundsGEP(VDispMap, Idxs),
	CharUnits::fromQuantity(4));
	}

	DstVBIndexEqZero =
	Builder.CreateICmpEQ(VirtualBaseAdjustmentOffset, getZeroInt());
	}
	}

	// Set the VBPtrOffset to zero if the vbindex is zero. Otherwise, initialize
	// it to the offset of the vbptr.
	if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance)) {
	llvm::Value *DstVBPtrOffset = llvm::ConstantInt::get(
	CGM.IntTy,
	getContext().getASTRecordLayout(DstRD).getVBPtrOffset().getQuantity());
	VBPtrOffset =
	Builder.CreateSelect(DstVBIndexEqZero, getZeroInt(), DstVBPtrOffset);
	}

	// Likewise, apply a similar adjustment so that dereferencing the member
	// pointer correctly accounts for the distance between the start of the first
	// virtual base and the top of the MDC.
	if (DstInheritance == MSInheritanceAttr::Keyword_virtual_inheritance) {
	if (int64_t DstOffsetToFirstVBase =
	getContext().getOffsetOfBaseWithVBPtr(DstRD).getQuantity()) {
	llvm::Value *DoDstAdjustment = Builder.CreateSelect(
	DstVBIndexEqZero,
	llvm::ConstantInt::get(CGM.IntTy, DstOffsetToFirstVBase),
	getZeroInt());
	NVAdjustField = Builder.CreateNSWSub(NVAdjustField, DoDstAdjustment);
	}
	}

	// Recompose dst from the null struct and the adjusted fields from src.
	llvm::Value *Dst;
	if (MSInheritanceAttr::hasOnlyOneField(IsFunc, DstInheritance)) {
	Dst = FirstField;
	} else {
	Dst = llvm::UndefValue::get(ConvertMemberPointerType(DstTy));
	unsigned Idx = 0;
	Dst = Builder.CreateInsertValue(Dst, FirstField, Idx++);
	if (MSInheritanceAttr::hasNVOffsetField(IsFunc, DstInheritance))
	Dst = Builder.CreateInsertValue(Dst, NonVirtualBaseAdjustment, Idx++);
	if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance))
	Dst = Builder.CreateInsertValue(Dst, VBPtrOffset, Idx++);
	if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance))
	Dst = Builder.CreateInsertValue(Dst, VirtualBaseAdjustmentOffset, Idx++);
	}
	return Dst;
	}

	llvm::Constant *
	MicrosoftCXXABI::EmitMemberPointerConversion(const CastExpr *E,
	llvm::Constant *Src) {
	const MemberPointerType *SrcTy =
	E->getSubExpr()->getType()->castAs<MemberPointerType>();
	const MemberPointerType *DstTy = E->getType()->castAs<MemberPointerType>();

	CastKind CK = E->getCastKind();

	return EmitMemberPointerConversion(SrcTy, DstTy, CK, E->path_begin(),
	E->path_end(), Src);
	}

	llvm::Constant *MicrosoftCXXABI::EmitMemberPointerConversion(
	const MemberPointerType SrcTy, const MemberPointerType DstTy, CastKind CK,
	CastExpr::path_const_iterator PathBegin,
	CastExpr::path_const_iterator PathEnd, llvm::Constant *Src) {
	assert(CK == CK_DerivedToBaseMemberPointer \|\|
	CK == CK_BaseToDerivedMemberPointer \|\|
	CK == CK_ReinterpretMemberPointer);
	// If src is null, emit a new null for dst. We can't return src because dst
	// might have a new representation.
	if (MemberPointerConstantIsNull(SrcTy, Src))
	return EmitNullMemberPointer(DstTy);

	// We don't need to do anything for reinterpret_casts of non-null member
	// pointers. We should only get here when the two type representations have
	// the same size.
	if (CK == CK_ReinterpretMemberPointer)
	return Src;

	CGBuilderTy Builder(CGM, CGM.getLLVMContext());
	auto *Dst = cast<llvm::Constant>(EmitNonNullMemberPointerConversion(
	SrcTy, DstTy, CK, PathBegin, PathEnd, Src, Builder));

	return Dst;
	}

	CGCallee MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer(
	CodeGenFunction &CGF, const Expr *E, Address This,
	llvm::Value &ThisPtrForCall, llvm::Value MemPtr,
	const MemberPointerType *MPT) {
	assert(MPT->isMemberFunctionPointer());
	const FunctionProtoType *FPT =
	MPT->getPointeeType()->castAs<FunctionProtoType>();
	const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
	llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
	CGM.getTypes().arrangeCXXMethodType(RD, FPT, /FD=/nullptr));
	CGBuilderTy &Builder = CGF.Builder;

	MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();

	// Extract the fields we need, regardless of model. We'll apply them if we
	// have them.
	llvm::Value *FunctionPointer = MemPtr;
	llvm::Value *NonVirtualBaseAdjustment = nullptr;
	llvm::Value *VirtualBaseAdjustmentOffset = nullptr;
	llvm::Value *VBPtrOffset = nullptr;
	if (MemPtr->getType()->isStructTy()) {
	// We need to extract values.
	unsigned I = 0;
	FunctionPointer = Builder.CreateExtractValue(MemPtr, I++);
	if (MSInheritanceAttr::hasNVOffsetField(MPT, Inheritance))
	NonVirtualBaseAdjustment = Builder.CreateExtractValue(MemPtr, I++);
	if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance))
	VBPtrOffset = Builder.CreateExtractValue(MemPtr, I++);
	if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
	VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(MemPtr, I++);
	}

	if (VirtualBaseAdjustmentOffset) {
	ThisPtrForCall = AdjustVirtualBase(CGF, E, RD, This,
	VirtualBaseAdjustmentOffset, VBPtrOffset);
	} else {
	ThisPtrForCall = This.getPointer();
	}

	if (NonVirtualBaseAdjustment) {
	// Apply the adjustment and cast back to the original struct type.
	llvm::Value *Ptr = Builder.CreateBitCast(ThisPtrForCall, CGF.Int8PtrTy);
	Ptr = Builder.CreateInBoundsGEP(Ptr, NonVirtualBaseAdjustment);
	ThisPtrForCall = Builder.CreateBitCast(Ptr, ThisPtrForCall->getType(),
	"this.adjusted");
	}

	FunctionPointer =
	Builder.CreateBitCast(FunctionPointer, FTy->getPointerTo());
	CGCallee Callee(FPT, FunctionPointer);
	return Callee;
	}

	CGCXXABI *clang::CodeGen::CreateMicrosoftCXXABI(CodeGenModule &CGM) {
	return new MicrosoftCXXABI(CGM);
	}

	// MS RTTI Overview:
	// The run time type information emitted by cl.exe contains 5 distinct types of
	// structures. Many of them reference each other.
	//
	// TypeInfo: Static classes that are returned by typeid.
	//
	// CompleteObjectLocator: Referenced by vftables. They contain information
	// required for dynamic casting, including OffsetFromTop. They also contain
	// a reference to the TypeInfo for the type and a reference to the
	// CompleteHierarchyDescriptor for the type.
	//
	// ClassHieararchyDescriptor: Contains information about a class hierarchy.
	// Used during dynamic_cast to walk a class hierarchy. References a base
	// class array and the size of said array.
	//
	// BaseClassArray: Contains a list of classes in a hierarchy. BaseClassArray is
	// somewhat of a misnomer because the most derived class is also in the list
	// as well as multiple copies of virtual bases (if they occur multiple times
	// in the hiearchy.) The BaseClassArray contains one BaseClassDescriptor for
	// every path in the hierarchy, in pre-order depth first order. Note, we do
	// not declare a specific llvm type for BaseClassArray, it's merely an array
	// of BaseClassDescriptor pointers.
	//
	// BaseClassDescriptor: Contains information about a class in a class hierarchy.
	// BaseClassDescriptor is also somewhat of a misnomer for the same reason that
	// BaseClassArray is. It contains information about a class within a
	// hierarchy such as: is this base is ambiguous and what is its offset in the
	// vbtable. The names of the BaseClassDescriptors have all of their fields
	// mangled into them so they can be aggressively deduplicated by the linker.

	static llvm::GlobalVariable *getTypeInfoVTable(CodeGenModule &CGM) {
	StringRef MangledName("\01??_7type_info@@6B@");
	if (auto VTable = CGM.getModule().getNamedGlobal(MangledName))
	return VTable;
	return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
	/Constant=/true,
	llvm::GlobalVariable::ExternalLinkage,
	/Initializer=/nullptr, MangledName);
	}

	namespace {

	/// \brief A Helper struct that stores information about a class in a class
	/// hierarchy. The information stored in these structs struct is used during
	/// the generation of ClassHierarchyDescriptors and BaseClassDescriptors.
	// During RTTI creation, MSRTTIClasses are stored in a contiguous array with
	// implicit depth first pre-order tree connectivity. getFirstChild and
	// getNextSibling allow us to walk the tree efficiently.
	struct MSRTTIClass {
	enum {
	IsPrivateOnPath = 1 \| 8,
	IsAmbiguous = 2,
	IsPrivate = 4,
	IsVirtual = 16,
	HasHierarchyDescriptor = 64
	};
	MSRTTIClass(const CXXRecordDecl *RD) : RD(RD) {}
	uint32_t initialize(const MSRTTIClass *Parent,
	const CXXBaseSpecifier *Specifier);

	MSRTTIClass *getFirstChild() { return this + 1; }
	static MSRTTIClass getNextChild(MSRTTIClass Child) {
	return Child + 1 + Child->NumBases;
	}

	const CXXRecordDecl RD, VirtualRoot;
	uint32_t Flags, NumBases, OffsetInVBase;
	};

	/// \brief Recursively initialize the base class array.
	uint32_t MSRTTIClass::initialize(const MSRTTIClass *Parent,
	const CXXBaseSpecifier *Specifier) {
	Flags = HasHierarchyDescriptor;
	if (!Parent) {
	VirtualRoot = nullptr;
	OffsetInVBase = 0;
	} else {
	if (Specifier->getAccessSpecifier() != AS_public)
	Flags \|= IsPrivate \| IsPrivateOnPath;
	if (Specifier->isVirtual()) {
	Flags \|= IsVirtual;
	VirtualRoot = RD;
	OffsetInVBase = 0;
	} else {
	if (Parent->Flags & IsPrivateOnPath)
	Flags \|= IsPrivateOnPath;
	VirtualRoot = Parent->VirtualRoot;
	OffsetInVBase = Parent->OffsetInVBase + RD->getASTContext()
	.getASTRecordLayout(Parent->RD).getBaseClassOffset(RD).getQuantity();
	}
	}
	NumBases = 0;
	MSRTTIClass *Child = getFirstChild();
	for (const CXXBaseSpecifier &Base : RD->bases()) {
	NumBases += Child->initialize(this, &Base) + 1;
	Child = getNextChild(Child);
	}
	return NumBases;
	}

	static llvm::GlobalValue::LinkageTypes getLinkageForRTTI(QualType Ty) {
	switch (Ty->getLinkage()) {
	case NoLinkage:
	case InternalLinkage:
	case UniqueExternalLinkage:
	return llvm::GlobalValue::InternalLinkage;

	case VisibleNoLinkage:
	case ModuleInternalLinkage:
	case ModuleLinkage:
	case ExternalLinkage:
	return llvm::GlobalValue::LinkOnceODRLinkage;
	}
	llvm_unreachable("Invalid linkage!");
	}

	/// \brief An ephemeral helper class for building MS RTTI types. It caches some
	/// calls to the module and information about the most derived class in a
	/// hierarchy.
	struct MSRTTIBuilder {
	enum {
	HasBranchingHierarchy = 1,
	HasVirtualBranchingHierarchy = 2,
	HasAmbiguousBases = 4
	};

	MSRTTIBuilder(MicrosoftCXXABI &ABI, const CXXRecordDecl *RD)
	: CGM(ABI.CGM), Context(CGM.getContext()),
	VMContext(CGM.getLLVMContext()), Module(CGM.getModule()), RD(RD),
	Linkage(getLinkageForRTTI(CGM.getContext().getTagDeclType(RD))),
	ABI(ABI) {}

	llvm::GlobalVariable *getBaseClassDescriptor(const MSRTTIClass &Classes);
	llvm::GlobalVariable *
	getBaseClassArray(SmallVectorImpl<MSRTTIClass> &Classes);
	llvm::GlobalVariable *getClassHierarchyDescriptor();
	llvm::GlobalVariable *getCompleteObjectLocator(const VPtrInfo &Info);

	CodeGenModule &CGM;
	ASTContext &Context;
	llvm::LLVMContext &VMContext;
	llvm::Module &Module;
	const CXXRecordDecl *RD;
	llvm::GlobalVariable::LinkageTypes Linkage;
	MicrosoftCXXABI &ABI;
	};

	} // namespace

	/// \brief Recursively serializes a class hierarchy in pre-order depth first
	/// order.
	static void serializeClassHierarchy(SmallVectorImpl<MSRTTIClass> &Classes,
	const CXXRecordDecl *RD) {
	Classes.push_back(MSRTTIClass(RD));
	for (const CXXBaseSpecifier &Base : RD->bases())
	serializeClassHierarchy(Classes, Base.getType()->getAsCXXRecordDecl());
	}

	/// \brief Find ambiguity among base classes.
	static void
	detectAmbiguousBases(SmallVectorImpl<MSRTTIClass> &Classes) {
	llvm::SmallPtrSet<const CXXRecordDecl *, 8> VirtualBases;
	llvm::SmallPtrSet<const CXXRecordDecl *, 8> UniqueBases;
	llvm::SmallPtrSet<const CXXRecordDecl *, 8> AmbiguousBases;
	for (MSRTTIClass *Class = &Classes.front(); Class <= &Classes.back();) {
	if ((Class->Flags & MSRTTIClass::IsVirtual) &&
	!VirtualBases.insert(Class->RD).second) {
	Class = MSRTTIClass::getNextChild(Class);
	continue;
	}
	if (!UniqueBases.insert(Class->RD).second)
	AmbiguousBases.insert(Class->RD);
	Class++;
	}
	if (AmbiguousBases.empty())
	return;
	for (MSRTTIClass &Class : Classes)
	if (AmbiguousBases.count(Class.RD))
	Class.Flags \|= MSRTTIClass::IsAmbiguous;
	}

	llvm::GlobalVariable *MSRTTIBuilder::getClassHierarchyDescriptor() {
	SmallString<256> MangledName;
	{
	llvm::raw_svector_ostream Out(MangledName);
	ABI.getMangleContext().mangleCXXRTTIClassHierarchyDescriptor(RD, Out);
	}

	// Check to see if we've already declared this ClassHierarchyDescriptor.
	if (auto CHD = Module.getNamedGlobal(MangledName))
	return CHD;

	// Serialize the class hierarchy and initialize the CHD Fields.
	SmallVector<MSRTTIClass, 8> Classes;
	serializeClassHierarchy(Classes, RD);
	Classes.front().initialize(/Parent=/nullptr, /Specifier=/nullptr);
	detectAmbiguousBases(Classes);
	int Flags = 0;
	for (auto Class : Classes) {
	if (Class.RD->getNumBases() > 1)
	Flags \|= HasBranchingHierarchy;
	// Note: cl.exe does not calculate "HasAmbiguousBases" correctly. We
	// believe the field isn't actually used.
	if (Class.Flags & MSRTTIClass::IsAmbiguous)
	Flags \|= HasAmbiguousBases;
	}
	if ((Flags & HasBranchingHierarchy) && RD->getNumVBases() != 0)
	Flags \|= HasVirtualBranchingHierarchy;
	// These gep indices are used to get the address of the first element of the
	// base class array.
	llvm::Value *GEPIndices[] = {llvm::ConstantInt::get(CGM.IntTy, 0),
	llvm::ConstantInt::get(CGM.IntTy, 0)};

	// Forward-declare the class hierarchy descriptor
	auto Type = ABI.getClassHierarchyDescriptorType();
	auto CHD = new llvm::GlobalVariable(Module, Type, /Constant=/true, Linkage,
	/Initializer=/nullptr,
	MangledName);
	if (CHD->isWeakForLinker())
	CHD->setComdat(CGM.getModule().getOrInsertComdat(CHD->getName()));

	auto *Bases = getBaseClassArray(Classes);

	// Initialize the base class ClassHierarchyDescriptor.
	llvm::Constant *Fields[] = {
	llvm::ConstantInt::get(CGM.IntTy, 0), // reserved by the runtime
	llvm::ConstantInt::get(CGM.IntTy, Flags),
	llvm::ConstantInt::get(CGM.IntTy, Classes.size()),
	ABI.getImageRelativeConstant(llvm::ConstantExpr::getInBoundsGetElementPtr(
	Bases->getValueType(), Bases,
	llvm::ArrayRef<llvm::Value *>(GEPIndices))),
	};
	CHD->setInitializer(llvm::ConstantStruct::get(Type, Fields));
	return CHD;
	}

	llvm::GlobalVariable *
	MSRTTIBuilder::getBaseClassArray(SmallVectorImpl<MSRTTIClass> &Classes) {
	SmallString<256> MangledName;
	{
	llvm::raw_svector_ostream Out(MangledName);
	ABI.getMangleContext().mangleCXXRTTIBaseClassArray(RD, Out);
	}

	// Forward-declare the base class array.
	// cl.exe pads the base class array with 1 (in 32 bit mode) or 4 (in 64 bit
	// mode) bytes of padding. We provide a pointer sized amount of padding by
	// adding +1 to Classes.size(). The sections have pointer alignment and are
	// marked pick-any so it shouldn't matter.
	llvm::Type *PtrType = ABI.getImageRelativeType(
	ABI.getBaseClassDescriptorType()->getPointerTo());
	auto *ArrType = llvm::ArrayType::get(PtrType, Classes.size() + 1);
	auto *BCA =
	new llvm::GlobalVariable(Module, ArrType,
	/Constant=/true, Linkage,
	/Initializer=/nullptr, MangledName);
	if (BCA->isWeakForLinker())
	BCA->setComdat(CGM.getModule().getOrInsertComdat(BCA->getName()));

	// Initialize the BaseClassArray.
	SmallVector<llvm::Constant *, 8> BaseClassArrayData;
	for (MSRTTIClass &Class : Classes)
	BaseClassArrayData.push_back(
	ABI.getImageRelativeConstant(getBaseClassDescriptor(Class)));
	BaseClassArrayData.push_back(llvm::Constant::getNullValue(PtrType));
	BCA->setInitializer(llvm::ConstantArray::get(ArrType, BaseClassArrayData));
	return BCA;
	}

	llvm::GlobalVariable *
	MSRTTIBuilder::getBaseClassDescriptor(const MSRTTIClass &Class) {
	// Compute the fields for the BaseClassDescriptor. They are computed up front
	// because they are mangled into the name of the object.
	uint32_t OffsetInVBTable = 0;
	int32_t VBPtrOffset = -1;
	if (Class.VirtualRoot) {
	auto &VTableContext = CGM.getMicrosoftVTableContext();
	OffsetInVBTable = VTableContext.getVBTableIndex(RD, Class.VirtualRoot) * 4;
	VBPtrOffset = Context.getASTRecordLayout(RD).getVBPtrOffset().getQuantity();
	}

	SmallString<256> MangledName;
	{
	llvm::raw_svector_ostream Out(MangledName);
	ABI.getMangleContext().mangleCXXRTTIBaseClassDescriptor(
	Class.RD, Class.OffsetInVBase, VBPtrOffset, OffsetInVBTable,
	Class.Flags, Out);
	}

	// Check to see if we've already declared this object.
	if (auto BCD = Module.getNamedGlobal(MangledName))
	return BCD;

	// Forward-declare the base class descriptor.
	auto Type = ABI.getBaseClassDescriptorType();
	auto BCD =
	new llvm::GlobalVariable(Module, Type, /Constant=/true, Linkage,
	/Initializer=/nullptr, MangledName);
	if (BCD->isWeakForLinker())
	BCD->setComdat(CGM.getModule().getOrInsertComdat(BCD->getName()));

	// Initialize the BaseClassDescriptor.
	llvm::Constant *Fields[] = {
	ABI.getImageRelativeConstant(
	ABI.getAddrOfRTTIDescriptor(Context.getTypeDeclType(Class.RD))),
	llvm::ConstantInt::get(CGM.IntTy, Class.NumBases),
	llvm::ConstantInt::get(CGM.IntTy, Class.OffsetInVBase),
	llvm::ConstantInt::get(CGM.IntTy, VBPtrOffset),
	llvm::ConstantInt::get(CGM.IntTy, OffsetInVBTable),
	llvm::ConstantInt::get(CGM.IntTy, Class.Flags),
	ABI.getImageRelativeConstant(
	MSRTTIBuilder(ABI, Class.RD).getClassHierarchyDescriptor()),
	};
	BCD->setInitializer(llvm::ConstantStruct::get(Type, Fields));
	return BCD;
	}

	llvm::GlobalVariable *
	MSRTTIBuilder::getCompleteObjectLocator(const VPtrInfo &Info) {
	SmallString<256> MangledName;
	{
	llvm::raw_svector_ostream Out(MangledName);
	ABI.getMangleContext().mangleCXXRTTICompleteObjectLocator(RD, Info.MangledPath, Out);
	}

	// Check to see if we've already computed this complete object locator.
	if (auto COL = Module.getNamedGlobal(MangledName))
	return COL;

	// Compute the fields of the complete object locator.
	int OffsetToTop = Info.FullOffsetInMDC.getQuantity();
	int VFPtrOffset = 0;
	// The offset includes the vtordisp if one exists.
	if (const CXXRecordDecl *VBase = Info.getVBaseWithVPtr())
	if (Context.getASTRecordLayout(RD)
	.getVBaseOffsetsMap()
	.find(VBase)
	->second.hasVtorDisp())
	VFPtrOffset = Info.NonVirtualOffset.getQuantity() + 4;

	// Forward-declare the complete object locator.
	llvm::StructType *Type = ABI.getCompleteObjectLocatorType();
	auto COL = new llvm::GlobalVariable(Module, Type, /Constant=/true, Linkage,
	/Initializer=/nullptr, MangledName);

	// Initialize the CompleteObjectLocator.
	llvm::Constant *Fields[] = {
	llvm::ConstantInt::get(CGM.IntTy, ABI.isImageRelative()),
	llvm::ConstantInt::get(CGM.IntTy, OffsetToTop),
	llvm::ConstantInt::get(CGM.IntTy, VFPtrOffset),
	ABI.getImageRelativeConstant(
	CGM.GetAddrOfRTTIDescriptor(Context.getTypeDeclType(RD))),
	ABI.getImageRelativeConstant(getClassHierarchyDescriptor()),
	ABI.getImageRelativeConstant(COL),
	};
	llvm::ArrayRef<llvm::Constant *> FieldsRef(Fields);
	if (!ABI.isImageRelative())
	FieldsRef = FieldsRef.drop_back();
	COL->setInitializer(llvm::ConstantStruct::get(Type, FieldsRef));
	if (COL->isWeakForLinker())
	COL->setComdat(CGM.getModule().getOrInsertComdat(COL->getName()));
	return COL;
	}

	static QualType decomposeTypeForEH(ASTContext &Context, QualType T,
	bool &IsConst, bool &IsVolatile,
	bool &IsUnaligned) {
	T = Context.getExceptionObjectType(T);

	// C++14 [except.handle]p3:
	// A handler is a match for an exception object of type E if [...]
	// - the handler is of type cv T or const T& where T is a pointer type and
	// E is a pointer type that can be converted to T by [...]
	// - a qualification conversion
	IsConst = false;
	IsVolatile = false;
	IsUnaligned = false;
	QualType PointeeType = T->getPointeeType();
	if (!PointeeType.isNull()) {
	IsConst = PointeeType.isConstQualified();
	IsVolatile = PointeeType.isVolatileQualified();
	IsUnaligned = PointeeType.getQualifiers().hasUnaligned();
	}

	// Member pointer types like "const int A::*" are represented by having RTTI
	// for "int A::*" and separately storing the const qualifier.
	if (const auto *MPTy = T->getAs<MemberPointerType>())
	T = Context.getMemberPointerType(PointeeType.getUnqualifiedType(),
	MPTy->getClass());

	// Pointer types like "const int * const *" are represented by having RTTI
	// for "const int **" and separately storing the const qualifier.
	if (T->isPointerType())
	T = Context.getPointerType(PointeeType.getUnqualifiedType());

	return T;
	}

	CatchTypeInfo
	MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type,
	QualType CatchHandlerType) {
	// TypeDescriptors for exceptions never have qualified pointer types,
	// qualifiers are stored separately in order to support qualification
	// conversions.
	bool IsConst, IsVolatile, IsUnaligned;
	Type =
	decomposeTypeForEH(getContext(), Type, IsConst, IsVolatile, IsUnaligned);

	bool IsReference = CatchHandlerType->isReferenceType();

	uint32_t Flags = 0;
	if (IsConst)
	Flags \|= 1;
	if (IsVolatile)
	Flags \|= 2;
	if (IsUnaligned)
	Flags \|= 4;
	if (IsReference)
	Flags \|= 8;

	return CatchTypeInfo{getAddrOfRTTIDescriptor(Type)->stripPointerCasts(),
	Flags};
	}

	/// \brief Gets a TypeDescriptor. Returns a llvm::Constant * rather than a
	/// llvm::GlobalVariable * because different type descriptors have different
	/// types, and need to be abstracted. They are abstracting by casting the
	/// address to an Int8PtrTy.
	llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) {
	SmallString<256> MangledName;
	{
	llvm::raw_svector_ostream Out(MangledName);
	getMangleContext().mangleCXXRTTI(Type, Out);
	}

	// Check to see if we've already declared this TypeDescriptor.
	if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
	return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);

	// Note for the future: If we would ever like to do deferred emission of
	// RTTI, check if emitting vtables opportunistically need any adjustment.

	// Compute the fields for the TypeDescriptor.
	SmallString<256> TypeInfoString;
	{
	llvm::raw_svector_ostream Out(TypeInfoString);
	getMangleContext().mangleCXXRTTIName(Type, Out);
	}

	// Declare and initialize the TypeDescriptor.
	llvm::Constant *Fields[] = {
	getTypeInfoVTable(CGM), // VFPtr
	llvm::ConstantPointerNull::get(CGM.Int8PtrTy), // Runtime data
	llvm::ConstantDataArray::getString(CGM.getLLVMContext(), TypeInfoString)};
	llvm::StructType *TypeDescriptorType =
	getTypeDescriptorType(TypeInfoString);
	auto *Var = new llvm::GlobalVariable(
	CGM.getModule(), TypeDescriptorType, /Constant=/false,
	getLinkageForRTTI(Type),
	llvm::ConstantStruct::get(TypeDescriptorType, Fields),
	MangledName);
	if (Var->isWeakForLinker())
	Var->setComdat(CGM.getModule().getOrInsertComdat(Var->getName()));
	return llvm::ConstantExpr::getBitCast(Var, CGM.Int8PtrTy);
	}

	/// \brief Gets or a creates a Microsoft CompleteObjectLocator.
	llvm::GlobalVariable *
	MicrosoftCXXABI::getMSCompleteObjectLocator(const CXXRecordDecl *RD,
	const VPtrInfo &Info) {
	return MSRTTIBuilder(*this, RD).getCompleteObjectLocator(Info);
	}

	static void emitCXXConstructor(CodeGenModule &CGM,
	const CXXConstructorDecl *ctor,
	StructorType ctorType) {
	// There are no constructor variants, always emit the complete destructor.
	llvm::Function *Fn = CGM.codegenCXXStructor(ctor, StructorType::Complete);
	CGM.maybeSetTrivialComdat(ctor, Fn);
	}

	static void emitCXXDestructor(CodeGenModule &CGM, const CXXDestructorDecl *dtor,
	StructorType dtorType) {
	// The complete destructor is equivalent to the base destructor for
	// classes with no virtual bases, so try to emit it as an alias.
	if (!dtor->getParent()->getNumVBases() &&
	(dtorType == StructorType::Complete \|\| dtorType == StructorType::Base)) {
	bool ProducedAlias = !CGM.TryEmitDefinitionAsAlias(
	GlobalDecl(dtor, Dtor_Complete), GlobalDecl(dtor, Dtor_Base), true);
	if (ProducedAlias) {
	if (dtorType == StructorType::Complete)
	return;
	if (dtor->isVirtual())
	CGM.getVTables().EmitThunks(GlobalDecl(dtor, Dtor_Complete));
	}
	}

	// The base destructor is equivalent to the base destructor of its
	// base class if there is exactly one non-virtual base class with a
	// non-trivial destructor, there are no fields with a non-trivial
	// destructor, and the body of the destructor is trivial.
	if (dtorType == StructorType::Base && !CGM.TryEmitBaseDestructorAsAlias(dtor))
	return;

	llvm::Function *Fn = CGM.codegenCXXStructor(dtor, dtorType);
	if (Fn->isWeakForLinker())
	Fn->setComdat(CGM.getModule().getOrInsertComdat(Fn->getName()));
	}

	void MicrosoftCXXABI::emitCXXStructor(const CXXMethodDecl *MD,
	StructorType Type) {
	if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
	emitCXXConstructor(CGM, CD, Type);
	return;
	}
	emitCXXDestructor(CGM, cast<CXXDestructorDecl>(MD), Type);
	}

	llvm::Function *
	MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
	CXXCtorType CT) {
	assert(CT == Ctor_CopyingClosure \|\| CT == Ctor_DefaultClosure);

	// Calculate the mangled name.
	SmallString<256> ThunkName;
	llvm::raw_svector_ostream Out(ThunkName);
	getMangleContext().mangleCXXCtor(CD, CT, Out);

	// If the thunk has been generated previously, just return it.
	if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName))
	return cast<llvm::Function>(GV);

	// Create the llvm::Function.
	const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeMSCtorClosure(CD, CT);
	llvm::FunctionType *ThunkTy = CGM.getTypes().GetFunctionType(FnInfo);
	const CXXRecordDecl *RD = CD->getParent();
	QualType RecordTy = getContext().getRecordType(RD);
	llvm::Function *ThunkFn = llvm::Function::Create(
	ThunkTy, getLinkageForRTTI(RecordTy), ThunkName.str(), &CGM.getModule());
	ThunkFn->setCallingConv(static_cast<llvm::CallingConv::ID>(
	FnInfo.getEffectiveCallingConvention()));
	if (ThunkFn->isWeakForLinker())
	ThunkFn->setComdat(CGM.getModule().getOrInsertComdat(ThunkFn->getName()));
	bool IsCopy = CT == Ctor_CopyingClosure;

	// Start codegen.
	CodeGenFunction CGF(CGM);
	CGF.CurGD = GlobalDecl(CD, Ctor_Complete);

	// Build FunctionArgs.
	FunctionArgList FunctionArgs;

	// A constructor always starts with a 'this' pointer as its first argument.
	buildThisParam(CGF, FunctionArgs);

	// Following the 'this' pointer is a reference to the source object that we
	// are copying from.
	ImplicitParamDecl SrcParam(
	getContext(), /DC=/nullptr, SourceLocation(),
	&getContext().Idents.get("src"),
	getContext().getLValueReferenceType(RecordTy,
	/SpelledAsLValue=/true),
	ImplicitParamDecl::Other);
	if (IsCopy)
	FunctionArgs.push_back(&SrcParam);

	// Constructors for classes which utilize virtual bases have an additional
	// parameter which indicates whether or not it is being delegated to by a more
	// derived constructor.
	ImplicitParamDecl IsMostDerived(getContext(), /DC=/nullptr,
	SourceLocation(),
	&getContext().Idents.get("is_most_derived"),
	getContext().IntTy, ImplicitParamDecl::Other);
	// Only add the parameter to the list if thie class has virtual bases.
	if (RD->getNumVBases() > 0)
	FunctionArgs.push_back(&IsMostDerived);

	// Start defining the function.
	auto NL = ApplyDebugLocation::CreateEmpty(CGF);
	CGF.StartFunction(GlobalDecl(), FnInfo.getReturnType(), ThunkFn, FnInfo,
	FunctionArgs, CD->getLocation(), SourceLocation());
	// Create a scope with an artificial location for the body of this function.
	auto AL = ApplyDebugLocation::CreateArtificial(CGF);
	EmitThisParam(CGF);
	llvm::Value *This = getThisValue(CGF);

	llvm::Value *SrcVal =
	IsCopy ? CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&SrcParam), "src")
	: nullptr;

	CallArgList Args;

	// Push the this ptr.
	Args.add(RValue::get(This), CD->getThisType(getContext()));

	// Push the src ptr.
	if (SrcVal)
	Args.add(RValue::get(SrcVal), SrcParam.getType());

	// Add the rest of the default arguments.
	SmallVector<const Stmt *, 4> ArgVec;
	ArrayRef<ParmVarDecl *> params = CD->parameters().drop_front(IsCopy ? 1 : 0);
	for (const ParmVarDecl *PD : params) {
	assert(PD->hasDefaultArg() && "ctor closure lacks default args");
	ArgVec.push_back(PD->getDefaultArg());
	}

	CodeGenFunction::RunCleanupsScope Cleanups(CGF);

	const auto *FPT = CD->getType()->castAs<FunctionProtoType>();
	CGF.EmitCallArgs(Args, FPT, llvm::makeArrayRef(ArgVec), CD, IsCopy ? 1 : 0);

	// Insert any ABI-specific implicit constructor arguments.
	AddedStructorArgs ExtraArgs =
	addImplicitConstructorArgs(CGF, CD, Ctor_Complete,
	/ForVirtualBase=/false,
	/Delegating=/false, Args);
	// Call the destructor with our arguments.
	llvm::Constant *CalleePtr =
	CGM.getAddrOfCXXStructor(CD, StructorType::Complete);
	CGCallee Callee = CGCallee::forDirect(CalleePtr, CD);
	const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall(
	Args, CD, Ctor_Complete, ExtraArgs.Prefix, ExtraArgs.Suffix);
	CGF.EmitCall(CalleeInfo, Callee, ReturnValueSlot(), Args);

	Cleanups.ForceCleanup();

	// Emit the ret instruction, remove any temporary instructions created for the
	// aid of CodeGen.
	CGF.FinishFunction(SourceLocation());

	return ThunkFn;
	}

	llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T,
	uint32_t NVOffset,
	int32_t VBPtrOffset,
	uint32_t VBIndex) {
	assert(!T->isReferenceType());

	CXXRecordDecl *RD = T->getAsCXXRecordDecl();
	const CXXConstructorDecl *CD =
	RD ? CGM.getContext().getCopyConstructorForExceptionObject(RD) : nullptr;
	CXXCtorType CT = Ctor_Complete;
	if (CD)
	if (!hasDefaultCXXMethodCC(getContext(), CD) \|\| CD->getNumParams() != 1)
	CT = Ctor_CopyingClosure;

	uint32_t Size = getContext().getTypeSizeInChars(T).getQuantity();
	SmallString<256> MangledName;
	{
	llvm::raw_svector_ostream Out(MangledName);
	getMangleContext().mangleCXXCatchableType(T, CD, CT, Size, NVOffset,
	VBPtrOffset, VBIndex, Out);
	}
	if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
	return getImageRelativeConstant(GV);

	// The TypeDescriptor is used by the runtime to determine if a catch handler
	// is appropriate for the exception object.
	llvm::Constant *TD = getImageRelativeConstant(getAddrOfRTTIDescriptor(T));

	// The runtime is responsible for calling the copy constructor if the
	// exception is caught by value.
	llvm::Constant *CopyCtor;
	if (CD) {
	if (CT == Ctor_CopyingClosure)
	CopyCtor = getAddrOfCXXCtorClosure(CD, Ctor_CopyingClosure);
	else
	CopyCtor = CGM.getAddrOfCXXStructor(CD, StructorType::Complete);

	CopyCtor = llvm::ConstantExpr::getBitCast(CopyCtor, CGM.Int8PtrTy);
	} else {
	CopyCtor = llvm::Constant::getNullValue(CGM.Int8PtrTy);
	}
	CopyCtor = getImageRelativeConstant(CopyCtor);

	bool IsScalar = !RD;
	bool HasVirtualBases = false;
	bool IsStdBadAlloc = false; // std::bad_alloc is special for some reason.
	QualType PointeeType = T;
	if (T->isPointerType())
	PointeeType = T->getPointeeType();
	if (const CXXRecordDecl *RD = PointeeType->getAsCXXRecordDecl()) {
	HasVirtualBases = RD->getNumVBases() > 0;
	if (IdentifierInfo *II = RD->getIdentifier())
	IsStdBadAlloc = II->isStr("bad_alloc") && RD->isInStdNamespace();
	}

	// Encode the relevant CatchableType properties into the Flags bitfield.
	// FIXME: Figure out how bits 2 or 8 can get set.
	uint32_t Flags = 0;
	if (IsScalar)
	Flags \|= 1;
	if (HasVirtualBases)
	Flags \|= 4;
	if (IsStdBadAlloc)
	Flags \|= 16;

	llvm::Constant *Fields[] = {
	llvm::ConstantInt::get(CGM.IntTy, Flags), // Flags
	TD, // TypeDescriptor
	llvm::ConstantInt::get(CGM.IntTy, NVOffset), // NonVirtualAdjustment
	llvm::ConstantInt::get(CGM.IntTy, VBPtrOffset), // OffsetToVBPtr
	llvm::ConstantInt::get(CGM.IntTy, VBIndex), // VBTableIndex
	llvm::ConstantInt::get(CGM.IntTy, Size), // Size
	CopyCtor // CopyCtor
	};
	llvm::StructType *CTType = getCatchableTypeType();
	auto *GV = new llvm::GlobalVariable(
	CGM.getModule(), CTType, /Constant=/true, getLinkageForRTTI(T),
	llvm::ConstantStruct::get(CTType, Fields), MangledName);
	GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
	GV->setSection(".xdata");
	if (GV->isWeakForLinker())
	GV->setComdat(CGM.getModule().getOrInsertComdat(GV->getName()));
	return getImageRelativeConstant(GV);
	}

	llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) {
	assert(!T->isReferenceType());

	// See if we've already generated a CatchableTypeArray for this type before.
	llvm::GlobalVariable *&CTA = CatchableTypeArrays[T];
	if (CTA)
	return CTA;

	// Ensure that we don't have duplicate entries in our CatchableTypeArray by
	// using a SmallSetVector. Duplicates may arise due to virtual bases
	// occurring more than once in the hierarchy.
	llvm::SmallSetVector<llvm::Constant *, 2> CatchableTypes;

	// C++14 [except.handle]p3:
	// A handler is a match for an exception object of type E if [...]
	// - the handler is of type cv T or cv T& and T is an unambiguous public
	// base class of E, or
	// - the handler is of type cv T or const T& where T is a pointer type and
	// E is a pointer type that can be converted to T by [...]
	// - a standard pointer conversion (4.10) not involving conversions to
	// pointers to private or protected or ambiguous classes
	const CXXRecordDecl *MostDerivedClass = nullptr;
	bool IsPointer = T->isPointerType();
	if (IsPointer)
	MostDerivedClass = T->getPointeeType()->getAsCXXRecordDecl();
	else
	MostDerivedClass = T->getAsCXXRecordDecl();

	// Collect all the unambiguous public bases of the MostDerivedClass.
	if (MostDerivedClass) {
	const ASTContext &Context = getContext();
	const ASTRecordLayout &MostDerivedLayout =
	Context.getASTRecordLayout(MostDerivedClass);
	MicrosoftVTableContext &VTableContext = CGM.getMicrosoftVTableContext();
	SmallVector<MSRTTIClass, 8> Classes;
	serializeClassHierarchy(Classes, MostDerivedClass);
	Classes.front().initialize(/Parent=/nullptr, /Specifier=/nullptr);
	detectAmbiguousBases(Classes);
	for (const MSRTTIClass &Class : Classes) {
	// Skip any ambiguous or private bases.
	if (Class.Flags &
	(MSRTTIClass::IsPrivateOnPath \| MSRTTIClass::IsAmbiguous))
	continue;
	// Write down how to convert from a derived pointer to a base pointer.
	uint32_t OffsetInVBTable = 0;
	int32_t VBPtrOffset = -1;
	if (Class.VirtualRoot) {
	OffsetInVBTable =
	VTableContext.getVBTableIndex(MostDerivedClass, Class.VirtualRoot)*4;
	VBPtrOffset = MostDerivedLayout.getVBPtrOffset().getQuantity();
	}

	// Turn our record back into a pointer if the exception object is a
	// pointer.
	QualType RTTITy = QualType(Class.RD->getTypeForDecl(), 0);
	if (IsPointer)
	RTTITy = Context.getPointerType(RTTITy);
	CatchableTypes.insert(getCatchableType(RTTITy, Class.OffsetInVBase,
	VBPtrOffset, OffsetInVBTable));
	}
	}

	// C++14 [except.handle]p3:
	// A handler is a match for an exception object of type E if
	// - The handler is of type cv T or cv T& and E and T are the same type
	// (ignoring the top-level cv-qualifiers)
	CatchableTypes.insert(getCatchableType(T));

	// C++14 [except.handle]p3:
	// A handler is a match for an exception object of type E if
	// - the handler is of type cv T or const T& where T is a pointer type and
	// E is a pointer type that can be converted to T by [...]
	// - a standard pointer conversion (4.10) not involving conversions to
	// pointers to private or protected or ambiguous classes
	//
	// C++14 [conv.ptr]p2:
	// A prvalue of type "pointer to cv T," where T is an object type, can be
	// converted to a prvalue of type "pointer to cv void".
	if (IsPointer && T->getPointeeType()->isObjectType())
	CatchableTypes.insert(getCatchableType(getContext().VoidPtrTy));

	// C++14 [except.handle]p3:
	// A handler is a match for an exception object of type E if [...]
	// - the handler is of type cv T or const T& where T is a pointer or
	// pointer to member type and E is std::nullptr_t.
	//
	// We cannot possibly list all possible pointer types here, making this
	// implementation incompatible with the standard. However, MSVC includes an
	// entry for pointer-to-void in this case. Let's do the same.
	if (T->isNullPtrType())
	CatchableTypes.insert(getCatchableType(getContext().VoidPtrTy));

	uint32_t NumEntries = CatchableTypes.size();
	llvm::Type *CTType =
	getImageRelativeType(getCatchableTypeType()->getPointerTo());
	llvm::ArrayType *AT = llvm::ArrayType::get(CTType, NumEntries);
	llvm::StructType *CTAType = getCatchableTypeArrayType(NumEntries);
	llvm::Constant *Fields[] = {
	llvm::ConstantInt::get(CGM.IntTy, NumEntries), // NumEntries
	llvm::ConstantArray::get(
	AT, llvm::makeArrayRef(CatchableTypes.begin(),
	CatchableTypes.end())) // CatchableTypes
	};
	SmallString<256> MangledName;
	{
	llvm::raw_svector_ostream Out(MangledName);
	getMangleContext().mangleCXXCatchableTypeArray(T, NumEntries, Out);
	}
	CTA = new llvm::GlobalVariable(
	CGM.getModule(), CTAType, /Constant=/true, getLinkageForRTTI(T),
	llvm::ConstantStruct::get(CTAType, Fields), MangledName);
	CTA->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
	CTA->setSection(".xdata");
	if (CTA->isWeakForLinker())
	CTA->setComdat(CGM.getModule().getOrInsertComdat(CTA->getName()));
	return CTA;
	}

	llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
	bool IsConst, IsVolatile, IsUnaligned;
	T = decomposeTypeForEH(getContext(), T, IsConst, IsVolatile, IsUnaligned);

	// The CatchableTypeArray enumerates the various (CV-unqualified) types that
	// the exception object may be caught as.
	llvm::GlobalVariable *CTA = getCatchableTypeArray(T);
	// The first field in a CatchableTypeArray is the number of CatchableTypes.
	// This is used as a component of the mangled name which means that we need to
	// know what it is in order to see if we have previously generated the
	// ThrowInfo.
	uint32_t NumEntries =
	cast<llvm::ConstantInt>(CTA->getInitializer()->getAggregateElement(0U))
	->getLimitedValue();

	SmallString<256> MangledName;
	{
	llvm::raw_svector_ostream Out(MangledName);
	getMangleContext().mangleCXXThrowInfo(T, IsConst, IsVolatile, IsUnaligned,
	NumEntries, Out);
	}

	// Reuse a previously generated ThrowInfo if we have generated an appropriate
	// one before.
	if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
	return GV;

	// The RTTI TypeDescriptor uses an unqualified type but catch clauses must
	// be at least as CV qualified. Encode this requirement into the Flags
	// bitfield.
	uint32_t Flags = 0;
	if (IsConst)
	Flags \|= 1;
	if (IsVolatile)
	Flags \|= 2;
	if (IsUnaligned)
	Flags \|= 4;

	// The cleanup-function (a destructor) must be called when the exception
	// object's lifetime ends.
	llvm::Constant *CleanupFn = llvm::Constant::getNullValue(CGM.Int8PtrTy);
	if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
	if (CXXDestructorDecl *DtorD = RD->getDestructor())
	if (!DtorD->isTrivial())
	CleanupFn = llvm::ConstantExpr::getBitCast(
	CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete),
	CGM.Int8PtrTy);
	// This is unused as far as we can tell, initialize it to null.
	llvm::Constant *ForwardCompat =
	getImageRelativeConstant(llvm::Constant::getNullValue(CGM.Int8PtrTy));
	llvm::Constant *PointerToCatchableTypes = getImageRelativeConstant(
	llvm::ConstantExpr::getBitCast(CTA, CGM.Int8PtrTy));
	llvm::StructType *TIType = getThrowInfoType();
	llvm::Constant *Fields[] = {
	llvm::ConstantInt::get(CGM.IntTy, Flags), // Flags
	getImageRelativeConstant(CleanupFn), // CleanupFn
	ForwardCompat, // ForwardCompat
	PointerToCatchableTypes // CatchableTypeArray
	};
	auto *GV = new llvm::GlobalVariable(
	CGM.getModule(), TIType, /Constant=/true, getLinkageForRTTI(T),
	llvm::ConstantStruct::get(TIType, Fields), StringRef(MangledName));
	GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
	GV->setSection(".xdata");
	if (GV->isWeakForLinker())
	GV->setComdat(CGM.getModule().getOrInsertComdat(GV->getName()));
	return GV;
	}

	void MicrosoftCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
	const Expr *SubExpr = E->getSubExpr();
	QualType ThrowType = SubExpr->getType();
	// The exception object lives on the stack and it's address is passed to the
	// runtime function.
	Address AI = CGF.CreateMemTemp(ThrowType);
	CGF.EmitAnyExprToMem(SubExpr, AI, ThrowType.getQualifiers(),
	/IsInit=/true);

	// The so-called ThrowInfo is used to describe how the exception object may be
	// caught.
	llvm::GlobalVariable *TI = getThrowInfo(ThrowType);

	// Call into the runtime to throw the exception.
	llvm::Value *Args[] = {
	CGF.Builder.CreateBitCast(AI.getPointer(), CGM.Int8PtrTy),
	TI
	};
	CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(), Args);
	}
	Index: head/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/Driver/ToolChains/Darwin.cpp (revision 322855)
	@@ -1,2028 +1,2033 @@
	//===--- Darwin.cpp - Darwin Tool and ToolChain Implementations -- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	#include "Darwin.h"
	#include "Arch/ARM.h"
	#include "CommonArgs.h"
	#include "clang/Basic/ObjCRuntime.h"
	#include "clang/Basic/VirtualFileSystem.h"
	#include "clang/Driver/Compilation.h"
	#include "clang/Driver/Driver.h"
	#include "clang/Driver/DriverDiagnostic.h"
	#include "clang/Driver/Options.h"
	#include "clang/Driver/SanitizerArgs.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Option/ArgList.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/ScopedPrinter.h"
	#include "llvm/Support/TargetParser.h"
	#include <cstdlib> // ::getenv

	using namespace clang::driver;
	using namespace clang::driver::tools;
	using namespace clang::driver::toolchains;
	using namespace clang;
	using namespace llvm::opt;

	llvm::Triple::ArchType darwin::getArchTypeForMachOArchName(StringRef Str) {
	// See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for
	// archs which Darwin doesn't use.

	// The matching this routine does is fairly pointless, since it is neither the
	// complete architecture list, nor a reasonable subset. The problem is that
	// historically the driver driver accepts this and also ties its -march=
	// handling to the architecture name, so we need to be careful before removing
	// support for it.

	// This code must be kept in sync with Clang's Darwin specific argument
	// translation.

	return llvm::StringSwitch<llvm::Triple::ArchType>(Str)
	.Cases("ppc", "ppc601", "ppc603", "ppc604", "ppc604e", llvm::Triple::ppc)
	.Cases("ppc750", "ppc7400", "ppc7450", "ppc970", llvm::Triple::ppc)
	.Case("ppc64", llvm::Triple::ppc64)
	.Cases("i386", "i486", "i486SX", "i586", "i686", llvm::Triple::x86)
	.Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4",
	llvm::Triple::x86)
	.Cases("x86_64", "x86_64h", llvm::Triple::x86_64)
	// This is derived from the driver driver.
	.Cases("arm", "armv4t", "armv5", "armv6", "armv6m", llvm::Triple::arm)
	.Cases("armv7", "armv7em", "armv7k", "armv7m", llvm::Triple::arm)
	.Cases("armv7s", "xscale", llvm::Triple::arm)
	.Case("arm64", llvm::Triple::aarch64)
	.Case("r600", llvm::Triple::r600)
	.Case("amdgcn", llvm::Triple::amdgcn)
	.Case("nvptx", llvm::Triple::nvptx)
	.Case("nvptx64", llvm::Triple::nvptx64)
	.Case("amdil", llvm::Triple::amdil)
	.Case("spir", llvm::Triple::spir)
	.Default(llvm::Triple::UnknownArch);
	}

	void darwin::setTripleTypeForMachOArchName(llvm::Triple &T, StringRef Str) {
	const llvm::Triple::ArchType Arch = getArchTypeForMachOArchName(Str);
	unsigned ArchKind = llvm::ARM::parseArch(Str);
	T.setArch(Arch);

	if (Str == "x86_64h")
	T.setArchName(Str);
	else if (ArchKind == llvm::ARM::AK_ARMV6M \|\|
	ArchKind == llvm::ARM::AK_ARMV7M \|\|
	ArchKind == llvm::ARM::AK_ARMV7EM) {
	T.setOS(llvm::Triple::UnknownOS);
	T.setObjectFormat(llvm::Triple::MachO);
	}
	}

	void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	ArgStringList CmdArgs;

	assert(Inputs.size() == 1 && "Unexpected number of inputs.");
	const InputInfo &Input = Inputs[0];

	// Determine the original source input.
	const Action *SourceAction = &JA;
	while (SourceAction->getKind() != Action::InputClass) {
	assert(!SourceAction->getInputs().empty() && "unexpected root action!");
	SourceAction = SourceAction->getInputs()[0];
	}

	// If -fno-integrated-as is used add -Q to the darwin assember driver to make
	// sure it runs its system assembler not clang's integrated assembler.
	// Applicable to darwin11+ and Xcode 4+. darwin<10 lacked integrated-as.
	// FIXME: at run-time detect assembler capabilities or rely on version
	// information forwarded by -target-assembler-version.
	if (Args.hasArg(options::OPT_fno_integrated_as)) {
	const llvm::Triple &T(getToolChain().getTriple());
	if (!(T.isMacOSX() && T.isMacOSXVersionLT(10, 7)))
	CmdArgs.push_back("-Q");
	}

	// Forward -g, assuming we are dealing with an actual assembly file.
	if (SourceAction->getType() == types::TY_Asm \|\|
	SourceAction->getType() == types::TY_PP_Asm) {
	if (Args.hasArg(options::OPT_gstabs))
	CmdArgs.push_back("--gstabs");
	else if (Args.hasArg(options::OPT_g_Group))
	CmdArgs.push_back("-g");
	}

	// Derived from asm spec.
	AddMachOArch(Args, CmdArgs);

	// Use -force_cpusubtype_ALL on x86 by default.
	if (getToolChain().getArch() == llvm::Triple::x86 \|\|
	getToolChain().getArch() == llvm::Triple::x86_64 \|\|
	Args.hasArg(options::OPT_force__cpusubtype__ALL))
	CmdArgs.push_back("-force_cpusubtype_ALL");

	if (getToolChain().getArch() != llvm::Triple::x86_64 &&
	(((Args.hasArg(options::OPT_mkernel) \|\|
	Args.hasArg(options::OPT_fapple_kext)) &&
	getMachOToolChain().isKernelStatic()) \|\|
	Args.hasArg(options::OPT_static)))
	CmdArgs.push_back("-static");

	Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);

	assert(Output.isFilename() && "Unexpected lipo output.");
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());

	assert(Input.isFilename() && "Invalid input.");
	CmdArgs.push_back(Input.getFilename());

	// asm_final spec is empty.

	const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
	}

	void darwin::MachOTool::anchor() {}

	void darwin::MachOTool::AddMachOArch(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	StringRef ArchName = getMachOToolChain().getMachOArchName(Args);

	// Derived from darwin_arch spec.
	CmdArgs.push_back("-arch");
	CmdArgs.push_back(Args.MakeArgString(ArchName));

	// FIXME: Is this needed anymore?
	if (ArchName == "arm")
	CmdArgs.push_back("-force_cpusubtype_ALL");
	}

	bool darwin::Linker::NeedsTempPath(const InputInfoList &Inputs) const {
	// We only need to generate a temp path for LTO if we aren't compiling object
	// files. When compiling source files, we run 'dsymutil' after linking. We
	// don't run 'dsymutil' when compiling object files.
	for (const auto &Input : Inputs)
	if (Input.getType() != types::TY_Object)
	return true;

	return false;
	}

	/// \brief Pass -no_deduplicate to ld64 under certain conditions:
	///
	/// - Either -O0 or -O1 is explicitly specified
	/// - No -O option is specified and this is a compile+link (implicit -O0)
	///
	/// Also do not add -no_deduplicate when no -O option is specified and this
	/// is just a link (we can't imply -O0)
	static bool shouldLinkerNotDedup(bool IsLinkerOnlyAction, const ArgList &Args) {
	if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
	if (A->getOption().matches(options::OPT_O0))
	return true;
	if (A->getOption().matches(options::OPT_O))
	return llvm::StringSwitch<bool>(A->getValue())
	.Case("1", true)
	.Default(false);
	return false; // OPT_Ofast & OPT_O4
	}

	if (!IsLinkerOnlyAction) // Implicit -O0 for compile+linker only.
	return true;
	return false;
	}

	void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args,
	ArgStringList &CmdArgs,
	const InputInfoList &Inputs) const {
	const Driver &D = getToolChain().getDriver();
	const toolchains::MachO &MachOTC = getMachOToolChain();

	unsigned Version[5] = {0, 0, 0, 0, 0};
	if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
	if (!Driver::GetReleaseVersion(A->getValue(), Version))
	D.Diag(diag::err_drv_invalid_version_number) << A->getAsString(Args);
	}

	// Newer linkers support -demangle. Pass it if supported and not disabled by
	// the user.
	if (Version[0] >= 100 && !Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
	CmdArgs.push_back("-demangle");

	if (Args.hasArg(options::OPT_rdynamic) && Version[0] >= 137)
	CmdArgs.push_back("-export_dynamic");

	// If we are using App Extension restrictions, pass a flag to the linker
	// telling it that the compiled code has been audited.
	if (Args.hasFlag(options::OPT_fapplication_extension,
	options::OPT_fno_application_extension, false))
	CmdArgs.push_back("-application_extension");

	if (D.isUsingLTO()) {
	// If we are using LTO, then automatically create a temporary file path for
	// the linker to use, so that it's lifetime will extend past a possible
	// dsymutil step.
	if (Version[0] >= 116 && NeedsTempPath(Inputs)) {
	const char *TmpPath = C.getArgs().MakeArgString(
	D.GetTemporaryPath("cc", types::getTypeTempSuffix(types::TY_Object)));
	C.addTempFile(TmpPath);
	CmdArgs.push_back("-object_path_lto");
	CmdArgs.push_back(TmpPath);
	}
	}

	// Use -lto_library option to specify the libLTO.dylib path. Try to find
	// it in clang installed libraries. ld64 will only look at this argument
	// when it actually uses LTO, so libLTO.dylib only needs to exist at link
	// time if ld64 decides that it needs to use LTO.
	// Since this is passed unconditionally, ld64 will never look for libLTO.dylib
	// next to it. That's ok since ld64 using a libLTO.dylib not matching the
	// clang version won't work anyways.
	if (Version[0] >= 133) {
	// Search for libLTO in <InstalledDir>/../lib/libLTO.dylib
	StringRef P = llvm::sys::path::parent_path(D.Dir);
	SmallString<128> LibLTOPath(P);
	llvm::sys::path::append(LibLTOPath, "lib");
	llvm::sys::path::append(LibLTOPath, "libLTO.dylib");
	CmdArgs.push_back("-lto_library");
	CmdArgs.push_back(C.getArgs().MakeArgString(LibLTOPath));
	}

	// ld64 version 262 and above run the deduplicate pass by default.
	if (Version[0] >= 262 && shouldLinkerNotDedup(C.getJobs().empty(), Args))
	CmdArgs.push_back("-no_deduplicate");

	// Derived from the "link" spec.
	Args.AddAllArgs(CmdArgs, options::OPT_static);
	if (!Args.hasArg(options::OPT_static))
	CmdArgs.push_back("-dynamic");
	if (Args.hasArg(options::OPT_fgnu_runtime)) {
	// FIXME: gcc replaces -lobjc in forward args with -lobjc-gnu
	// here. How do we wish to handle such things?
	}

	if (!Args.hasArg(options::OPT_dynamiclib)) {
	AddMachOArch(Args, CmdArgs);
	// FIXME: Why do this only on this path?
	Args.AddLastArg(CmdArgs, options::OPT_force__cpusubtype__ALL);

	Args.AddLastArg(CmdArgs, options::OPT_bundle);
	Args.AddAllArgs(CmdArgs, options::OPT_bundle__loader);
	Args.AddAllArgs(CmdArgs, options::OPT_client__name);

	Arg *A;
	if ((A = Args.getLastArg(options::OPT_compatibility__version)) \|\|
	(A = Args.getLastArg(options::OPT_current__version)) \|\|
	(A = Args.getLastArg(options::OPT_install__name)))
	D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args)
	<< "-dynamiclib";

	Args.AddLastArg(CmdArgs, options::OPT_force__flat__namespace);
	Args.AddLastArg(CmdArgs, options::OPT_keep__private__externs);
	Args.AddLastArg(CmdArgs, options::OPT_private__bundle);
	} else {
	CmdArgs.push_back("-dylib");

	Arg *A;
	if ((A = Args.getLastArg(options::OPT_bundle)) \|\|
	(A = Args.getLastArg(options::OPT_bundle__loader)) \|\|
	(A = Args.getLastArg(options::OPT_client__name)) \|\|
	(A = Args.getLastArg(options::OPT_force__flat__namespace)) \|\|
	(A = Args.getLastArg(options::OPT_keep__private__externs)) \|\|
	(A = Args.getLastArg(options::OPT_private__bundle)))
	D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
	<< "-dynamiclib";

	Args.AddAllArgsTranslated(CmdArgs, options::OPT_compatibility__version,
	"-dylib_compatibility_version");
	Args.AddAllArgsTranslated(CmdArgs, options::OPT_current__version,
	"-dylib_current_version");

	AddMachOArch(Args, CmdArgs);

	Args.AddAllArgsTranslated(CmdArgs, options::OPT_install__name,
	"-dylib_install_name");
	}

	Args.AddLastArg(CmdArgs, options::OPT_all__load);
	Args.AddAllArgs(CmdArgs, options::OPT_allowable__client);
	Args.AddLastArg(CmdArgs, options::OPT_bind__at__load);
	if (MachOTC.isTargetIOSBased())
	Args.AddLastArg(CmdArgs, options::OPT_arch__errors__fatal);
	Args.AddLastArg(CmdArgs, options::OPT_dead__strip);
	Args.AddLastArg(CmdArgs, options::OPT_no__dead__strip__inits__and__terms);
	Args.AddAllArgs(CmdArgs, options::OPT_dylib__file);
	Args.AddLastArg(CmdArgs, options::OPT_dynamic);
	Args.AddAllArgs(CmdArgs, options::OPT_exported__symbols__list);
	Args.AddLastArg(CmdArgs, options::OPT_flat__namespace);
	Args.AddAllArgs(CmdArgs, options::OPT_force__load);
	Args.AddAllArgs(CmdArgs, options::OPT_headerpad__max__install__names);
	Args.AddAllArgs(CmdArgs, options::OPT_image__base);
	Args.AddAllArgs(CmdArgs, options::OPT_init);

	// Add the deployment target.
	MachOTC.addMinVersionArgs(Args, CmdArgs);

	Args.AddLastArg(CmdArgs, options::OPT_nomultidefs);
	Args.AddLastArg(CmdArgs, options::OPT_multi__module);
	Args.AddLastArg(CmdArgs, options::OPT_single__module);
	Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined);
	Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined__unused);

	if (const Arg *A =
	Args.getLastArg(options::OPT_fpie, options::OPT_fPIE,
	options::OPT_fno_pie, options::OPT_fno_PIE)) {
	if (A->getOption().matches(options::OPT_fpie) \|\|
	A->getOption().matches(options::OPT_fPIE))
	CmdArgs.push_back("-pie");
	else
	CmdArgs.push_back("-no_pie");
	}

	// for embed-bitcode, use -bitcode_bundle in linker command
	if (C.getDriver().embedBitcodeEnabled()) {
	// Check if the toolchain supports bitcode build flow.
	if (MachOTC.SupportsEmbeddedBitcode()) {
	CmdArgs.push_back("-bitcode_bundle");
	if (C.getDriver().embedBitcodeMarkerOnly() && Version[0] >= 278) {
	CmdArgs.push_back("-bitcode_process_mode");
	CmdArgs.push_back("marker");
	}
	} else
	D.Diag(diag::err_drv_bitcode_unsupported_on_toolchain);
	}

	Args.AddLastArg(CmdArgs, options::OPT_prebind);
	Args.AddLastArg(CmdArgs, options::OPT_noprebind);
	Args.AddLastArg(CmdArgs, options::OPT_nofixprebinding);
	Args.AddLastArg(CmdArgs, options::OPT_prebind__all__twolevel__modules);
	Args.AddLastArg(CmdArgs, options::OPT_read__only__relocs);
	Args.AddAllArgs(CmdArgs, options::OPT_sectcreate);
	Args.AddAllArgs(CmdArgs, options::OPT_sectorder);
	Args.AddAllArgs(CmdArgs, options::OPT_seg1addr);
	Args.AddAllArgs(CmdArgs, options::OPT_segprot);
	Args.AddAllArgs(CmdArgs, options::OPT_segaddr);
	Args.AddAllArgs(CmdArgs, options::OPT_segs__read__only__addr);
	Args.AddAllArgs(CmdArgs, options::OPT_segs__read__write__addr);
	Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table);
	Args.AddAllArgs(CmdArgs, options::OPT_seg__addr__table__filename);
	Args.AddAllArgs(CmdArgs, options::OPT_sub__library);
	Args.AddAllArgs(CmdArgs, options::OPT_sub__umbrella);

	// Give --sysroot= preference, over the Apple specific behavior to also use
	// --isysroot as the syslibroot.
	StringRef sysroot = C.getSysRoot();
	if (sysroot != "") {
	CmdArgs.push_back("-syslibroot");
	CmdArgs.push_back(C.getArgs().MakeArgString(sysroot));
	} else if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
	CmdArgs.push_back("-syslibroot");
	CmdArgs.push_back(A->getValue());
	}

	Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace);
	Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace__hints);
	Args.AddAllArgs(CmdArgs, options::OPT_umbrella);
	Args.AddAllArgs(CmdArgs, options::OPT_undefined);
	Args.AddAllArgs(CmdArgs, options::OPT_unexported__symbols__list);
	Args.AddAllArgs(CmdArgs, options::OPT_weak__reference__mismatches);
	Args.AddLastArg(CmdArgs, options::OPT_X_Flag);
	Args.AddAllArgs(CmdArgs, options::OPT_y);
	Args.AddLastArg(CmdArgs, options::OPT_w);
	Args.AddAllArgs(CmdArgs, options::OPT_pagezero__size);
	Args.AddAllArgs(CmdArgs, options::OPT_segs__read__);
	Args.AddLastArg(CmdArgs, options::OPT_seglinkedit);
	Args.AddLastArg(CmdArgs, options::OPT_noseglinkedit);
	Args.AddAllArgs(CmdArgs, options::OPT_sectalign);
	Args.AddAllArgs(CmdArgs, options::OPT_sectobjectsymbols);
	Args.AddAllArgs(CmdArgs, options::OPT_segcreate);
	Args.AddLastArg(CmdArgs, options::OPT_whyload);
	Args.AddLastArg(CmdArgs, options::OPT_whatsloaded);
	Args.AddAllArgs(CmdArgs, options::OPT_dylinker__install__name);
	Args.AddLastArg(CmdArgs, options::OPT_dylinker);
	Args.AddLastArg(CmdArgs, options::OPT_Mach);
	}

	/// \brief Determine whether we are linking the ObjC runtime.
	static bool isObjCRuntimeLinked(const ArgList &Args) {
	if (isObjCAutoRefCount(Args)) {
	Args.ClaimAllArgs(options::OPT_fobjc_link_runtime);
	return true;
	}
	return Args.hasArg(options::OPT_fobjc_link_runtime);
	}

	void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	assert(Output.getType() == types::TY_Image && "Invalid linker output type.");

	// If the number of arguments surpasses the system limits, we will encode the
	// input files in a separate file, shortening the command line. To this end,
	// build a list of input file names that can be passed via a file with the
	// -filelist linker option.
	llvm::opt::ArgStringList InputFileList;

	// The logic here is derived from gcc's behavior; most of which
	// comes from specs (starting with link_command). Consult gcc for
	// more information.
	ArgStringList CmdArgs;

	/// Hack(tm) to ignore linking errors when we are doing ARC migration.
	if (Args.hasArg(options::OPT_ccc_arcmt_check,
	options::OPT_ccc_arcmt_migrate)) {
	for (const auto &Arg : Args)
	Arg->claim();
	const char *Exec =
	Args.MakeArgString(getToolChain().GetProgramPath("touch"));
	CmdArgs.push_back(Output.getFilename());
	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, None));
	return;
	}

	// I'm not sure why this particular decomposition exists in gcc, but
	// we follow suite for ease of comparison.
	AddLinkArgs(C, Args, CmdArgs, Inputs);

	// For LTO, pass the name of the optimization record file.
	if (Args.hasFlag(options::OPT_fsave_optimization_record,
	options::OPT_fno_save_optimization_record, false)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-lto-pass-remarks-output");
	CmdArgs.push_back("-mllvm");

	SmallString<128> F;
	F = Output.getFilename();
	F += ".opt.yaml";
	CmdArgs.push_back(Args.MakeArgString(F));

	if (getLastProfileUseArg(Args)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-lto-pass-remarks-with-hotness");
	}
	}

	// It seems that the 'e' option is completely ignored for dynamic executables
	// (the default), and with static executables, the last one wins, as expected.
	Args.AddAllArgs(CmdArgs, {options::OPT_d_Flag, options::OPT_s, options::OPT_t,
	options::OPT_Z_Flag, options::OPT_u_Group,
	options::OPT_e, options::OPT_r});

	// Forward -ObjC when either -ObjC or -ObjC++ is used, to force loading
	// members of static archive libraries which implement Objective-C classes or
	// categories.
	if (Args.hasArg(options::OPT_ObjC) \|\| Args.hasArg(options::OPT_ObjCXX))
	CmdArgs.push_back("-ObjC");

	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());

	if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles))
	getMachOToolChain().addStartObjectFileArgs(Args, CmdArgs);

	// SafeStack requires its own runtime libraries
	// These libraries should be linked first, to make sure the
	// __safestack_init constructor executes before everything else
	if (getToolChain().getSanitizerArgs().needsSafeStackRt()) {
	getMachOToolChain().AddLinkRuntimeLib(Args, CmdArgs,
	"libclang_rt.safestack_osx.a",
	/AlwaysLink=/true);
	}

	Args.AddAllArgs(CmdArgs, options::OPT_L);

	AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
	// Build the input file for -filelist (list of linker input files) in case we
	// need it later
	for (const auto &II : Inputs) {
	if (!II.isFilename()) {
	// This is a linker input argument.
	// We cannot mix input arguments and file names in a -filelist input, thus
	// we prematurely stop our list (remaining files shall be passed as
	// arguments).
	if (InputFileList.size() > 0)
	break;

	continue;
	}

	InputFileList.push_back(II.getFilename());
	}

	if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs))
	addOpenMPRuntime(CmdArgs, getToolChain(), Args);

	if (isObjCRuntimeLinked(Args) &&
	!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
	// We use arclite library for both ARC and subscripting support.
	getMachOToolChain().AddLinkARCArgs(Args, CmdArgs);

	CmdArgs.push_back("-framework");
	CmdArgs.push_back("Foundation");
	// Link libobj.
	CmdArgs.push_back("-lobjc");
	}

	if (LinkingOutput) {
	CmdArgs.push_back("-arch_multiple");
	CmdArgs.push_back("-final_output");
	CmdArgs.push_back(LinkingOutput);
	}

	if (Args.hasArg(options::OPT_fnested_functions))
	CmdArgs.push_back("-allow_stack_execute");

	getMachOToolChain().addProfileRTLibs(Args, CmdArgs);

	if (unsigned Parallelism =
	getLTOParallelism(Args, getToolChain().getDriver())) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-threads=") + llvm::to_string(Parallelism)));
	}

	if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
	if (getToolChain().getDriver().CCCIsCXX())
	getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);

	// link_ssp spec is empty.

	// Let the tool chain choose which runtime library to link.
	getMachOToolChain().AddLinkRuntimeLibArgs(Args, CmdArgs);

	// No need to do anything for pthreads. Claim argument to avoid warning.
	Args.ClaimAllArgs(options::OPT_pthread);
	Args.ClaimAllArgs(options::OPT_pthreads);
	}

	if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
	// endfile_spec is empty.
	}

	Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
	Args.AddAllArgs(CmdArgs, options::OPT_F);

	// -iframework should be forwarded as -F.
	for (const Arg *A : Args.filtered(options::OPT_iframework))
	CmdArgs.push_back(Args.MakeArgString(std::string("-F") + A->getValue()));

	if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
	if (Arg *A = Args.getLastArg(options::OPT_fveclib)) {
	if (A->getValue() == StringRef("Accelerate")) {
	CmdArgs.push_back("-framework");
	CmdArgs.push_back("Accelerate");
	}
	}
	}

	const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
	std::unique_ptr<Command> Cmd =
	llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs);
	Cmd->setInputFileList(std::move(InputFileList));
	C.addCommand(std::move(Cmd));
	}

	void darwin::Lipo::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	ArgStringList CmdArgs;

	CmdArgs.push_back("-create");
	assert(Output.isFilename() && "Unexpected lipo output.");

	CmdArgs.push_back("-output");
	CmdArgs.push_back(Output.getFilename());

	for (const auto &II : Inputs) {
	assert(II.isFilename() && "Unexpected lipo input.");
	CmdArgs.push_back(II.getFilename());
	}

	const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("lipo"));
	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
	}

	void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	ArgStringList CmdArgs;

	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());

	assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
	const InputInfo &Input = Inputs[0];
	assert(Input.isFilename() && "Unexpected dsymutil input.");
	CmdArgs.push_back(Input.getFilename());

	const char *Exec =
	Args.MakeArgString(getToolChain().GetProgramPath("dsymutil"));
	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
	}

	void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	ArgStringList CmdArgs;
	CmdArgs.push_back("--verify");
	CmdArgs.push_back("--debug-info");
	CmdArgs.push_back("--eh-frame");
	CmdArgs.push_back("--quiet");

	assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
	const InputInfo &Input = Inputs[0];
	assert(Input.isFilename() && "Unexpected verify input");

	// Grabbing the output of the earlier dsymutil run.
	CmdArgs.push_back(Input.getFilename());

	const char *Exec =
	Args.MakeArgString(getToolChain().GetProgramPath("dwarfdump"));
	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
	}

	MachO::MachO(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
	: ToolChain(D, Triple, Args) {
	// We expect 'as', 'ld', etc. to be adjacent to our install dir.
	getProgramPaths().push_back(getDriver().getInstalledDir());
	if (getDriver().getInstalledDir() != getDriver().Dir)
	getProgramPaths().push_back(getDriver().Dir);
	}

	/// Darwin - Darwin tool chain for i386 and x86_64.
	Darwin::Darwin(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
	: MachO(D, Triple, Args), TargetInitialized(false),
	CudaInstallation(D, Triple, Args) {}

	types::ID MachO::LookupTypeForExtension(StringRef Ext) const {
	types::ID Ty = types::lookupTypeForExtension(Ext);

	// Darwin always preprocesses assembly files (unless -x is used explicitly).
	if (Ty == types::TY_PP_Asm)
	return types::TY_Asm;

	return Ty;
	}

	bool MachO::HasNativeLLVMSupport() const { return true; }

	ToolChain::CXXStdlibType Darwin::GetDefaultCXXStdlibType() const {
	// Default to use libc++ on OS X 10.9+ and iOS 7+.
	if ((isTargetMacOS() && !isMacosxVersionLT(10, 9)) \|\|
	(isTargetIOSBased() && !isIPhoneOSVersionLT(7, 0)) \|\|
	isTargetWatchOSBased())
	return ToolChain::CST_Libcxx;

	return ToolChain::CST_Libstdcxx;
	}

	/// Darwin provides an ARC runtime starting in MacOS X 10.7 and iOS 5.0.
	ObjCRuntime Darwin::getDefaultObjCRuntime(bool isNonFragile) const {
	if (isTargetWatchOSBased())
	return ObjCRuntime(ObjCRuntime::WatchOS, TargetVersion);
	if (isTargetIOSBased())
	return ObjCRuntime(ObjCRuntime::iOS, TargetVersion);
	if (isNonFragile)
	return ObjCRuntime(ObjCRuntime::MacOSX, TargetVersion);
	return ObjCRuntime(ObjCRuntime::FragileMacOSX, TargetVersion);
	}

	/// Darwin provides a blocks runtime starting in MacOS X 10.6 and iOS 3.2.
	bool Darwin::hasBlocksRuntime() const {
	if (isTargetWatchOSBased())
	return true;
	else if (isTargetIOSBased())
	return !isIPhoneOSVersionLT(3, 2);
	else {
	assert(isTargetMacOS() && "unexpected darwin target");
	return !isMacosxVersionLT(10, 6);
	}
	}

	void Darwin::AddCudaIncludeArgs(const ArgList &DriverArgs,
	ArgStringList &CC1Args) const {
	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
	}

	// This is just a MachO name translation routine and there's no
	// way to join this into ARMTargetParser without breaking all
	// other assumptions. Maybe MachO should consider standardising
	// their nomenclature.
	static const char *ArmMachOArchName(StringRef Arch) {
	return llvm::StringSwitch<const char *>(Arch)
	.Case("armv6k", "armv6")
	.Case("armv6m", "armv6m")
	.Case("armv5tej", "armv5")
	.Case("xscale", "xscale")
	.Case("armv4t", "armv4t")
	.Case("armv7", "armv7")
	.Cases("armv7a", "armv7-a", "armv7")
	.Cases("armv7r", "armv7-r", "armv7")
	.Cases("armv7em", "armv7e-m", "armv7em")
	.Cases("armv7k", "armv7-k", "armv7k")
	.Cases("armv7m", "armv7-m", "armv7m")
	.Cases("armv7s", "armv7-s", "armv7s")
	.Default(nullptr);
	}

	static const char *ArmMachOArchNameCPU(StringRef CPU) {
	unsigned ArchKind = llvm::ARM::parseCPUArch(CPU);
	if (ArchKind == llvm::ARM::AK_INVALID)
	return nullptr;
	StringRef Arch = llvm::ARM::getArchName(ArchKind);

	// FIXME: Make sure this MachO triple mangling is really necessary.
	// ARMv5* normalises to ARMv5.
	if (Arch.startswith("armv5"))
	Arch = Arch.substr(0, 5);
	// ARMv6*, except ARMv6M, normalises to ARMv6.
	else if (Arch.startswith("armv6") && !Arch.endswith("6m"))
	Arch = Arch.substr(0, 5);
	// ARMv7A normalises to ARMv7.
	else if (Arch.endswith("v7a"))
	Arch = Arch.substr(0, 5);
	return Arch.data();
	}

	StringRef MachO::getMachOArchName(const ArgList &Args) const {
	switch (getTriple().getArch()) {
	default:
	return getDefaultUniversalArchName();

	case llvm::Triple::aarch64:
	return "arm64";

	case llvm::Triple::thumb:
	case llvm::Triple::arm:
	if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ))
	if (const char *Arch = ArmMachOArchName(A->getValue()))
	return Arch;

	if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
	if (const char *Arch = ArmMachOArchNameCPU(A->getValue()))
	return Arch;

	return "arm";
	}
	}

	Darwin::~Darwin() {}

	MachO::~MachO() {}

	std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args,
	types::ID InputType) const {
	llvm::Triple Triple(ComputeLLVMTriple(Args, InputType));

	// If the target isn't initialized (e.g., an unknown Darwin platform, return
	// the default triple).
	if (!isTargetInitialized())
	return Triple.getTriple();

	SmallString<16> Str;
	if (isTargetWatchOSBased())
	Str += "watchos";
	else if (isTargetTvOSBased())
	Str += "tvos";
	else if (isTargetIOSBased())
	Str += "ios";
	else
	Str += "macosx";
	Str += getTargetVersion().getAsString();
	Triple.setOSName(Str);

	return Triple.getTriple();
	}

	Tool *MachO::getTool(Action::ActionClass AC) const {
	switch (AC) {
	case Action::LipoJobClass:
	if (!Lipo)
	Lipo.reset(new tools::darwin::Lipo(*this));
	return Lipo.get();
	case Action::DsymutilJobClass:
	if (!Dsymutil)
	Dsymutil.reset(new tools::darwin::Dsymutil(*this));
	return Dsymutil.get();
	case Action::VerifyDebugInfoJobClass:
	if (!VerifyDebug)
	VerifyDebug.reset(new tools::darwin::VerifyDebug(*this));
	return VerifyDebug.get();
	default:
	return ToolChain::getTool(AC);
	}
	}

	Tool MachO::buildLinker() const { return new tools::darwin::Linker(this); }

	Tool *MachO::buildAssembler() const {
	return new tools::darwin::Assembler(*this);
	}

	DarwinClang::DarwinClang(const Driver &D, const llvm::Triple &Triple,
	const ArgList &Args)
	: Darwin(D, Triple, Args) {}

	void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const {
	// For modern targets, promote certain warnings to errors.
	if (isTargetWatchOSBased() \|\| getTriple().isArch64Bit()) {
	// Always enable -Wdeprecated-objc-isa-usage and promote it
	// to an error.
	CC1Args.push_back("-Wdeprecated-objc-isa-usage");
	CC1Args.push_back("-Werror=deprecated-objc-isa-usage");

	// For iOS and watchOS, also error about implicit function declarations,
	// as that can impact calling conventions.
	if (!isTargetMacOS())
	CC1Args.push_back("-Werror=implicit-function-declaration");
	}
	}

	void DarwinClang::AddLinkARCArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	// Avoid linking compatibility stubs on i386 mac.
	if (isTargetMacOS() && getArch() == llvm::Triple::x86)
	return;

	ObjCRuntime runtime = getDefaultObjCRuntime(/nonfragile/ true);

	if ((runtime.hasNativeARC() \|\| !isObjCAutoRefCount(Args)) &&
	runtime.hasSubscripting())
	return;

	CmdArgs.push_back("-force_load");
	SmallString<128> P(getDriver().ClangExecutable);
	llvm::sys::path::remove_filename(P); // 'clang'
	llvm::sys::path::remove_filename(P); // 'bin'
	llvm::sys::path::append(P, "lib", "arc", "libarclite_");
	// Mash in the platform.
	if (isTargetWatchOSSimulator())
	P += "watchsimulator";
	else if (isTargetWatchOS())
	P += "watchos";
	else if (isTargetTvOSSimulator())
	P += "appletvsimulator";
	else if (isTargetTvOS())
	P += "appletvos";
	else if (isTargetIOSSimulator())
	P += "iphonesimulator";
	else if (isTargetIPhoneOS())
	P += "iphoneos";
	else
	P += "macosx";
	P += ".a";

	CmdArgs.push_back(Args.MakeArgString(P));
	}

	unsigned DarwinClang::GetDefaultDwarfVersion() const {
	// Default to use DWARF 2 on OS X 10.10 / iOS 8 and lower.
	if ((isTargetMacOS() && isMacosxVersionLT(10, 11)) \|\|
	(isTargetIOSBased() && isIPhoneOSVersionLT(9)))
	return 2;
	return 4;
	}

	void MachO::AddLinkRuntimeLib(const ArgList &Args, ArgStringList &CmdArgs,
	StringRef DarwinLibName, bool AlwaysLink,
	bool IsEmbedded, bool AddRPath) const {
	SmallString<128> Dir(getDriver().ResourceDir);
	llvm::sys::path::append(Dir, "lib", IsEmbedded ? "macho_embedded" : "darwin");

	SmallString<128> P(Dir);
	llvm::sys::path::append(P, DarwinLibName);

	// For now, allow missing resource libraries to support developers who may
	// not have compiler-rt checked out or integrated into their build (unless
	// we explicitly force linking with this library).
	if (AlwaysLink \|\| getVFS().exists(P))
	CmdArgs.push_back(Args.MakeArgString(P));

	// Adding the rpaths might negatively interact when other rpaths are involved,
	// so we should make sure we add the rpaths last, after all user-specified
	// rpaths. This is currently true from this place, but we need to be
	// careful if this function is ever called before user's rpaths are emitted.
	if (AddRPath) {
	assert(DarwinLibName.endswith(".dylib") && "must be a dynamic library");

	// Add @executable_path to rpath to support having the dylib copied with
	// the executable.
	CmdArgs.push_back("-rpath");
	CmdArgs.push_back("@executable_path");

	// Add the path to the resource dir to rpath to support using the dylib
	// from the default location without copying.
	CmdArgs.push_back("-rpath");
	CmdArgs.push_back(Args.MakeArgString(Dir));
	}
	}

	void MachO::AddFuzzerLinkArgs(const ArgList &Args, ArgStringList &CmdArgs) const {

	// Go up one directory from Clang to find the libfuzzer archive file.
	StringRef ParentDir = llvm::sys::path::parent_path(getDriver().InstalledDir);
	SmallString<128> P(ParentDir);
	llvm::sys::path::append(P, "lib", "libLLVMFuzzer.a");
	CmdArgs.push_back(Args.MakeArgString(P));

	// Libfuzzer is written in C++ and requires libcxx.
	AddCXXStdlibLibArgs(Args, CmdArgs);
	}

	StringRef Darwin::getPlatformFamily() const {
	switch (TargetPlatform) {
	case DarwinPlatformKind::MacOS:
	return "MacOSX";
	case DarwinPlatformKind::IPhoneOS:
	case DarwinPlatformKind::IPhoneOSSimulator:
	return "iPhone";
	case DarwinPlatformKind::TvOS:
	case DarwinPlatformKind::TvOSSimulator:
	return "AppleTV";
	case DarwinPlatformKind::WatchOS:
	case DarwinPlatformKind::WatchOSSimulator:
	return "Watch";
	}
	llvm_unreachable("Unsupported platform");
	}

	StringRef Darwin::getSDKName(StringRef isysroot) {
	// Assume SDK has path: SOME_PATH/SDKs/PlatformXX.YY.sdk
	llvm::sys::path::const_iterator SDKDir;
	auto BeginSDK = llvm::sys::path::begin(isysroot);
	auto EndSDK = llvm::sys::path::end(isysroot);
	for (auto IT = BeginSDK; IT != EndSDK; ++IT) {
	StringRef SDK = *IT;
	if (SDK.endswith(".sdk"))
	return SDK.slice(0, SDK.size() - 4);
	}
	return "";
	}

	StringRef Darwin::getOSLibraryNameSuffix() const {
	switch(TargetPlatform) {
	case DarwinPlatformKind::MacOS:
	return "osx";
	case DarwinPlatformKind::IPhoneOS:
	return "ios";
	case DarwinPlatformKind::IPhoneOSSimulator:
	return "iossim";
	case DarwinPlatformKind::TvOS:
	return "tvos";
	case DarwinPlatformKind::TvOSSimulator:
	return "tvossim";
	case DarwinPlatformKind::WatchOS:
	return "watchos";
	case DarwinPlatformKind::WatchOSSimulator:
	return "watchossim";
	}
	llvm_unreachable("Unsupported platform");
	}

	void Darwin::addProfileRTLibs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	if (!needsProfileRT(Args)) return;

	AddLinkRuntimeLib(Args, CmdArgs, (Twine("libclang_rt.profile_") +
	getOSLibraryNameSuffix() + ".a").str(),
	/AlwaysLink/ true);
	}

	void DarwinClang::AddLinkSanitizerLibArgs(const ArgList &Args,
	ArgStringList &CmdArgs,
	StringRef Sanitizer) const {
	AddLinkRuntimeLib(
	Args, CmdArgs,
	(Twine("libclang_rt.") + Sanitizer + "_" +
	getOSLibraryNameSuffix() + "_dynamic.dylib").str(),
	/AlwaysLink/ true, /IsEmbedded/ false,
	/AddRPath/ true);
	}

	ToolChain::RuntimeLibType DarwinClang::GetRuntimeLibType(
	const ArgList &Args) const {
	if (Arg* A = Args.getLastArg(options::OPT_rtlib_EQ)) {
	StringRef Value = A->getValue();
	if (Value != "compiler-rt")
	getDriver().Diag(clang::diag::err_drv_unsupported_rtlib_for_platform)
	<< Value << "darwin";
	}

	return ToolChain::RLT_CompilerRT;
	}

	void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	// Call once to ensure diagnostic is printed if wrong value was specified
	GetRuntimeLibType(Args);

	// Darwin doesn't support real static executables, don't link any runtime
	// libraries with -static.
	if (Args.hasArg(options::OPT_static) \|\|
	Args.hasArg(options::OPT_fapple_kext) \|\|
	Args.hasArg(options::OPT_mkernel))
	return;

	// Reject -static-libgcc for now, we can deal with this when and if someone
	// cares. This is useful in situations where someone wants to statically link
	// something like libstdc++, and needs its runtime support routines.
	if (const Arg *A = Args.getLastArg(options::OPT_static_libgcc)) {
	getDriver().Diag(diag::err_drv_unsupported_opt) << A->getAsString(Args);
	return;
	}

	const SanitizerArgs &Sanitize = getSanitizerArgs();
	if (Sanitize.needsAsanRt())
	AddLinkSanitizerLibArgs(Args, CmdArgs, "asan");
	if (Sanitize.needsLsanRt())
	AddLinkSanitizerLibArgs(Args, CmdArgs, "lsan");
	if (Sanitize.needsUbsanRt())
	AddLinkSanitizerLibArgs(Args, CmdArgs, "ubsan");
	if (Sanitize.needsTsanRt())
	AddLinkSanitizerLibArgs(Args, CmdArgs, "tsan");
	if (Sanitize.needsFuzzer() && !Args.hasArg(options::OPT_dynamiclib))
	AddFuzzerLinkArgs(Args, CmdArgs);
	if (Sanitize.needsStatsRt()) {
	StringRef OS = isTargetMacOS() ? "osx" : "iossim";
	AddLinkRuntimeLib(Args, CmdArgs,
	(Twine("libclang_rt.stats_client_") + OS + ".a").str(),
	/AlwaysLink=/true);
	AddLinkSanitizerLibArgs(Args, CmdArgs, "stats");
	}
	if (Sanitize.needsEsanRt())
	AddLinkSanitizerLibArgs(Args, CmdArgs, "esan");

	// Otherwise link libSystem, then the dynamic runtime library, and finally any
	// target specific static runtime library.
	CmdArgs.push_back("-lSystem");

	// Select the dynamic runtime library and the target specific static library.
	if (isTargetWatchOSBased()) {
	// We currently always need a static runtime library for watchOS.
	AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.watchos.a");
	} else if (isTargetTvOSBased()) {
	// We currently always need a static runtime library for tvOS.
	AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.tvos.a");
	} else if (isTargetIOSBased()) {
	// If we are compiling as iOS / simulator, don't attempt to link libgcc_s.1,
	// it never went into the SDK.
	// Linking against libgcc_s.1 isn't needed for iOS 5.0+
	if (isIPhoneOSVersionLT(5, 0) && !isTargetIOSSimulator() &&
	getTriple().getArch() != llvm::Triple::aarch64)
	CmdArgs.push_back("-lgcc_s.1");

	// We currently always need a static runtime library for iOS.
	AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.ios.a");
	} else {
	assert(isTargetMacOS() && "unexpected non MacOS platform");
	// The dynamic runtime library was merged with libSystem for 10.6 and
	// beyond; only 10.4 and 10.5 need an additional runtime library.
	if (isMacosxVersionLT(10, 5))
	CmdArgs.push_back("-lgcc_s.10.4");
	else if (isMacosxVersionLT(10, 6))
	CmdArgs.push_back("-lgcc_s.10.5");

	// Originally for OS X, we thought we would only need a static runtime
	// library when targeting 10.4, to provide versions of the static functions
	// which were omitted from 10.4.dylib. This led to the creation of the 10.4
	// builtins library.
	//
	// Unfortunately, that turned out to not be true, because Darwin system
	// headers can still use eprintf on i386, and it is not exported from
	// libSystem. Therefore, we still must provide a runtime library just for
	// the tiny tiny handful of projects that might use that symbol.
	//
	// Then over time, we figured out it was useful to add more things to the
	// runtime so we created libclang_rt.osx.a to provide new functions when
	// deploying to old OS builds, and for a long time we had both eprintf and
	// osx builtin libraries. Which just seems excessive. So with PR 28855, we
	// are removing the eprintf library and expecting eprintf to be provided by
	// the OS X builtins library.
	if (isMacosxVersionLT(10, 5))
	AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.10.4.a");
	else
	AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.osx.a");
	}
	}

	/// Returns the most appropriate macOS target version for the current process.
	///
	/// If the macOS SDK version is the same or earlier than the system version,
	/// then the SDK version is returned. Otherwise the system version is returned.
	static std::string getSystemOrSDKMacOSVersion(StringRef MacOSSDKVersion) {
	unsigned Major, Minor, Micro;
	llvm::Triple SystemTriple(llvm::sys::getProcessTriple());
	if (!SystemTriple.isMacOSX())
	return MacOSSDKVersion;
	SystemTriple.getMacOSXVersion(Major, Minor, Micro);
	VersionTuple SystemVersion(Major, Minor, Micro);
	bool HadExtra;
	if (!Driver::GetReleaseVersion(MacOSSDKVersion, Major, Minor, Micro,
	HadExtra))
	return MacOSSDKVersion;
	VersionTuple SDKVersion(Major, Minor, Micro);
	if (SDKVersion > SystemVersion)
	return SystemVersion.getAsString();
	return MacOSSDKVersion;
	}

	void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
	const OptTable &Opts = getDriver().getOpts();

	// Support allowing the SDKROOT environment variable used by xcrun and other
	// Xcode tools to define the default sysroot, by making it the default for
	// isysroot.
	if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
	// Warn if the path does not exist.
	if (!getVFS().exists(A->getValue()))
	getDriver().Diag(clang::diag::warn_missing_sysroot) << A->getValue();
	} else {
	if (char *env = ::getenv("SDKROOT")) {
	// We only use this value as the default if it is an absolute path,
	// exists, and it is not the root path.
	if (llvm::sys::path::is_absolute(env) && getVFS().exists(env) &&
	StringRef(env) != "/") {
	Args.append(Args.MakeSeparateArg(
	nullptr, Opts.getOption(options::OPT_isysroot), env));
	}
	}
	}

	Arg *OSXVersion = Args.getLastArg(options::OPT_mmacosx_version_min_EQ);
	Arg *iOSVersion = Args.getLastArg(options::OPT_miphoneos_version_min_EQ,
	options::OPT_mios_simulator_version_min_EQ);
	Arg *TvOSVersion =
	Args.getLastArg(options::OPT_mtvos_version_min_EQ,
	options::OPT_mtvos_simulator_version_min_EQ);
	Arg *WatchOSVersion =
	Args.getLastArg(options::OPT_mwatchos_version_min_EQ,
	options::OPT_mwatchos_simulator_version_min_EQ);

	unsigned Major, Minor, Micro;
	bool HadExtra;

	// The iOS deployment target that is explicitly specified via a command line
	// option or an environment variable.
	std::string ExplicitIOSDeploymentTargetStr;

	if (iOSVersion)
	ExplicitIOSDeploymentTargetStr = iOSVersion->getAsString(Args);

	// Add a macro to differentiate between m(iphone\|tv\|watch)os-version-min=X.Y and
	// -m(iphone\|tv\|watch)simulator-version-min=X.Y.
	if (Args.hasArg(options::OPT_mios_simulator_version_min_EQ) \|\|
	Args.hasArg(options::OPT_mtvos_simulator_version_min_EQ) \|\|
	Args.hasArg(options::OPT_mwatchos_simulator_version_min_EQ))
	Args.append(Args.MakeSeparateArg(nullptr, Opts.getOption(options::OPT_D),
	" __APPLE_EMBEDDED_SIMULATOR__=1"));

	if (OSXVersion && (iOSVersion \|\| TvOSVersion \|\| WatchOSVersion)) {
	getDriver().Diag(diag::err_drv_argument_not_allowed_with)
	<< OSXVersion->getAsString(Args)
	<< (iOSVersion ? iOSVersion :
	TvOSVersion ? TvOSVersion : WatchOSVersion)->getAsString(Args);
	iOSVersion = TvOSVersion = WatchOSVersion = nullptr;
	} else if (iOSVersion && (TvOSVersion \|\| WatchOSVersion)) {
	getDriver().Diag(diag::err_drv_argument_not_allowed_with)
	<< iOSVersion->getAsString(Args)
	<< (TvOSVersion ? TvOSVersion : WatchOSVersion)->getAsString(Args);
	TvOSVersion = WatchOSVersion = nullptr;
	} else if (TvOSVersion && WatchOSVersion) {
	getDriver().Diag(diag::err_drv_argument_not_allowed_with)
	<< TvOSVersion->getAsString(Args)
	<< WatchOSVersion->getAsString(Args);
	WatchOSVersion = nullptr;
	} else if (!OSXVersion && !iOSVersion && !TvOSVersion && !WatchOSVersion) {
	// If no deployment target was specified on the command line, check for
	// environment defines.
	std::string OSXTarget;
	std::string iOSTarget;
	std::string TvOSTarget;
	std::string WatchOSTarget;

	if (char *env = ::getenv("MACOSX_DEPLOYMENT_TARGET"))
	OSXTarget = env;
	if (char *env = ::getenv("IPHONEOS_DEPLOYMENT_TARGET"))
	iOSTarget = env;
	if (char *env = ::getenv("TVOS_DEPLOYMENT_TARGET"))
	TvOSTarget = env;
	if (char *env = ::getenv("WATCHOS_DEPLOYMENT_TARGET"))
	WatchOSTarget = env;

	if (!iOSTarget.empty())
	ExplicitIOSDeploymentTargetStr =
	std::string("IPHONEOS_DEPLOYMENT_TARGET=") + iOSTarget;

	// If there is no command-line argument to specify the Target version and
	// no environment variable defined, see if we can set the default based
	// on -isysroot.
	if (OSXTarget.empty() && iOSTarget.empty() && WatchOSTarget.empty() &&
	TvOSTarget.empty() && Args.hasArg(options::OPT_isysroot)) {
	if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
	StringRef isysroot = A->getValue();
	StringRef SDK = getSDKName(isysroot);
	if (SDK.size() > 0) {
	// Slice the version number out.
	// Version number is between the first and the last number.
	size_t StartVer = SDK.find_first_of("0123456789");
	size_t EndVer = SDK.find_last_of("0123456789");
	if (StartVer != StringRef::npos && EndVer > StartVer) {
	StringRef Version = SDK.slice(StartVer, EndVer + 1);
	if (SDK.startswith("iPhoneOS") \|\|
	SDK.startswith("iPhoneSimulator"))
	iOSTarget = Version;
	else if (SDK.startswith("MacOSX"))
	OSXTarget = getSystemOrSDKMacOSVersion(Version);
	else if (SDK.startswith("WatchOS") \|\|
	SDK.startswith("WatchSimulator"))
	WatchOSTarget = Version;
	else if (SDK.startswith("AppleTVOS") \|\|
	SDK.startswith("AppleTVSimulator"))
	TvOSTarget = Version;
	}
	}
	}
	}

	// If no OS targets have been specified, try to guess platform from -target
	// or arch name and compute the version from the triple.
	if (OSXTarget.empty() && iOSTarget.empty() && TvOSTarget.empty() &&
	WatchOSTarget.empty()) {
	llvm::Triple::OSType OSTy = llvm::Triple::UnknownOS;

	// Set the OSTy based on -target if -arch isn't present.
	if (Args.hasArg(options::OPT_target) && !Args.hasArg(options::OPT_arch)) {
	OSTy = getTriple().getOS();
	} else {
	StringRef MachOArchName = getMachOArchName(Args);
	if (MachOArchName == "armv7" \|\| MachOArchName == "armv7s" \|\|
	MachOArchName == "arm64")
	OSTy = llvm::Triple::IOS;
	else if (MachOArchName == "armv7k")
	OSTy = llvm::Triple::WatchOS;
	else if (MachOArchName != "armv6m" && MachOArchName != "armv7m" &&
	MachOArchName != "armv7em")
	OSTy = llvm::Triple::MacOSX;
	}


	if (OSTy != llvm::Triple::UnknownOS) {
	unsigned Major, Minor, Micro;
	std::string *OSTarget;

	switch (OSTy) {
	case llvm::Triple::Darwin:
	case llvm::Triple::MacOSX:
	if (!getTriple().getMacOSXVersion(Major, Minor, Micro))
	getDriver().Diag(diag::err_drv_invalid_darwin_version)
	<< getTriple().getOSName();
	OSTarget = &OSXTarget;
	break;
	case llvm::Triple::IOS:
	getTriple().getiOSVersion(Major, Minor, Micro);
	OSTarget = &iOSTarget;
	break;
	case llvm::Triple::TvOS:
	getTriple().getOSVersion(Major, Minor, Micro);
	OSTarget = &TvOSTarget;
	break;
	case llvm::Triple::WatchOS:
	getTriple().getWatchOSVersion(Major, Minor, Micro);
	OSTarget = &WatchOSTarget;
	break;
	default:
	llvm_unreachable("Unexpected OS type");
	break;
	}

	llvm::raw_string_ostream(*OSTarget) << Major << '.' << Minor << '.'
	<< Micro;
	}
	}

	// Do not allow conflicts with the watchOS target.
	if (!WatchOSTarget.empty() && (!iOSTarget.empty() \|\| !TvOSTarget.empty())) {
	getDriver().Diag(diag::err_drv_conflicting_deployment_targets)
	<< "WATCHOS_DEPLOYMENT_TARGET"
	<< (!iOSTarget.empty() ? "IPHONEOS_DEPLOYMENT_TARGET" :
	"TVOS_DEPLOYMENT_TARGET");
	}

	// Do not allow conflicts with the tvOS target.
	if (!TvOSTarget.empty() && !iOSTarget.empty()) {
	getDriver().Diag(diag::err_drv_conflicting_deployment_targets)
	<< "TVOS_DEPLOYMENT_TARGET"
	<< "IPHONEOS_DEPLOYMENT_TARGET";
	}

	// Allow conflicts among OSX and iOS for historical reasons, but choose the
	// default platform.
	if (!OSXTarget.empty() && (!iOSTarget.empty() \|\|
	!WatchOSTarget.empty() \|\|
	!TvOSTarget.empty())) {
	if (getTriple().getArch() == llvm::Triple::arm \|\|
	getTriple().getArch() == llvm::Triple::aarch64 \|\|
	getTriple().getArch() == llvm::Triple::thumb)
	OSXTarget = "";
	else
	iOSTarget = WatchOSTarget = TvOSTarget = "";
	}

	if (!OSXTarget.empty()) {
	const Option O = Opts.getOption(options::OPT_mmacosx_version_min_EQ);
	OSXVersion = Args.MakeJoinedArg(nullptr, O, OSXTarget);
	Args.append(OSXVersion);
	} else if (!iOSTarget.empty()) {
	const Option O = Opts.getOption(options::OPT_miphoneos_version_min_EQ);
	iOSVersion = Args.MakeJoinedArg(nullptr, O, iOSTarget);
	Args.append(iOSVersion);
	} else if (!TvOSTarget.empty()) {
	const Option O = Opts.getOption(options::OPT_mtvos_version_min_EQ);
	TvOSVersion = Args.MakeJoinedArg(nullptr, O, TvOSTarget);
	Args.append(TvOSVersion);
	} else if (!WatchOSTarget.empty()) {
	const Option O = Opts.getOption(options::OPT_mwatchos_version_min_EQ);
	WatchOSVersion = Args.MakeJoinedArg(nullptr, O, WatchOSTarget);
	Args.append(WatchOSVersion);
	}
	}

	DarwinPlatformKind Platform;
	if (OSXVersion)
	Platform = MacOS;
	else if (iOSVersion)
	Platform = IPhoneOS;
	else if (TvOSVersion)
	Platform = TvOS;
	else if (WatchOSVersion)
	Platform = WatchOS;
	else
	llvm_unreachable("Unable to infer Darwin variant");

	// Set the tool chain target information.
	if (Platform == MacOS) {
	assert((!iOSVersion && !TvOSVersion && !WatchOSVersion) &&
	"Unknown target platform!");
	if (!Driver::GetReleaseVersion(OSXVersion->getValue(), Major, Minor, Micro,
	HadExtra) \|\|
	HadExtra \|\| Major != 10 \|\| Minor >= 100 \|\| Micro >= 100)
	getDriver().Diag(diag::err_drv_invalid_version_number)
	<< OSXVersion->getAsString(Args);
	} else if (Platform == IPhoneOS) {
	assert(iOSVersion && "Unknown target platform!");
	if (!Driver::GetReleaseVersion(iOSVersion->getValue(), Major, Minor, Micro,
	HadExtra) \|\|
	HadExtra \|\| Major >= 100 \|\| Minor >= 100 \|\| Micro >= 100)
	getDriver().Diag(diag::err_drv_invalid_version_number)
	<< iOSVersion->getAsString(Args);
	// For 32-bit targets, the deployment target for iOS has to be earlier than
	// iOS 11.
	if (getTriple().isArch32Bit() && Major >= 11) {
	// If the deployment target is explicitly specified, print a diagnostic.
	if (!ExplicitIOSDeploymentTargetStr.empty()) {
	getDriver().Diag(diag::warn_invalid_ios_deployment_target)
	<< ExplicitIOSDeploymentTargetStr;
	// Otherwise, set it to 10.99.99.
	} else {
	Major = 10;
	Minor = 99;
	Micro = 99;
	}
	}
	} else if (Platform == TvOS) {
	if (!Driver::GetReleaseVersion(TvOSVersion->getValue(), Major, Minor,
	Micro, HadExtra) \|\| HadExtra \|\|
	Major >= 100 \|\| Minor >= 100 \|\| Micro >= 100)
	getDriver().Diag(diag::err_drv_invalid_version_number)
	<< TvOSVersion->getAsString(Args);
	} else if (Platform == WatchOS) {
	if (!Driver::GetReleaseVersion(WatchOSVersion->getValue(), Major, Minor,
	Micro, HadExtra) \|\| HadExtra \|\|
	Major >= 10 \|\| Minor >= 100 \|\| Micro >= 100)
	getDriver().Diag(diag::err_drv_invalid_version_number)
	<< WatchOSVersion->getAsString(Args);
	} else
	llvm_unreachable("unknown kind of Darwin platform");

	// Recognize iOS targets with an x86 architecture as the iOS simulator.
	if (iOSVersion && (getTriple().getArch() == llvm::Triple::x86 \|\|
	getTriple().getArch() == llvm::Triple::x86_64))
	Platform = IPhoneOSSimulator;
	if (TvOSVersion && (getTriple().getArch() == llvm::Triple::x86 \|\|
	getTriple().getArch() == llvm::Triple::x86_64))
	Platform = TvOSSimulator;
	if (WatchOSVersion && (getTriple().getArch() == llvm::Triple::x86 \|\|
	getTriple().getArch() == llvm::Triple::x86_64))
	Platform = WatchOSSimulator;

	setTarget(Platform, Major, Minor, Micro);

	if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
	StringRef SDK = getSDKName(A->getValue());
	if (SDK.size() > 0) {
	size_t StartVer = SDK.find_first_of("0123456789");
	StringRef SDKName = SDK.slice(0, StartVer);
	if (!SDKName.startswith(getPlatformFamily()))
	getDriver().Diag(diag::warn_incompatible_sysroot)
	<< SDKName << getPlatformFamily();
	}
	}
	}

	void DarwinClang::AddCXXStdlibLibArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	CXXStdlibType Type = GetCXXStdlibType(Args);

	switch (Type) {
	case ToolChain::CST_Libcxx:
	CmdArgs.push_back("-lc++");
	break;

	case ToolChain::CST_Libstdcxx:
	// Unfortunately, -lstdc++ doesn't always exist in the standard search path;
	// it was previously found in the gcc lib dir. However, for all the Darwin
	// platforms we care about it was -lstdc++.6, so we search for that
	// explicitly if we can't see an obvious -lstdc++ candidate.

	// Check in the sysroot first.
	if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
	SmallString<128> P(A->getValue());
	llvm::sys::path::append(P, "usr", "lib", "libstdc++.dylib");

	if (!getVFS().exists(P)) {
	llvm::sys::path::remove_filename(P);
	llvm::sys::path::append(P, "libstdc++.6.dylib");
	if (getVFS().exists(P)) {
	CmdArgs.push_back(Args.MakeArgString(P));
	return;
	}
	}
	}

	// Otherwise, look in the root.
	// FIXME: This should be removed someday when we don't have to care about
	// 10.6 and earlier, where /usr/lib/libstdc++.dylib does not exist.
	if (!getVFS().exists("/usr/lib/libstdc++.dylib") &&
	getVFS().exists("/usr/lib/libstdc++.6.dylib")) {
	CmdArgs.push_back("/usr/lib/libstdc++.6.dylib");
	return;
	}

	// Otherwise, let the linker search.
	CmdArgs.push_back("-lstdc++");
	break;
	}
	}

	void DarwinClang::AddCCKextLibArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	// For Darwin platforms, use the compiler-rt-based support library
	// instead of the gcc-provided one (which is also incidentally
	// only present in the gcc lib dir, which makes it hard to find).

	SmallString<128> P(getDriver().ResourceDir);
	llvm::sys::path::append(P, "lib", "darwin");

	// Use the newer cc_kext for iOS ARM after 6.0.
	if (isTargetWatchOS()) {
	llvm::sys::path::append(P, "libclang_rt.cc_kext_watchos.a");
	} else if (isTargetTvOS()) {
	llvm::sys::path::append(P, "libclang_rt.cc_kext_tvos.a");
	} else if (isTargetIPhoneOS()) {
	llvm::sys::path::append(P, "libclang_rt.cc_kext_ios.a");
	} else {
	llvm::sys::path::append(P, "libclang_rt.cc_kext.a");
	}

	// For now, allow missing resource libraries to support developers who may
	// not have compiler-rt checked out or integrated into their build.
	if (getVFS().exists(P))
	CmdArgs.push_back(Args.MakeArgString(P));
	}

	DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args,
	StringRef BoundArch,
	Action::OffloadKind) const {
	DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
	const OptTable &Opts = getDriver().getOpts();

	// FIXME: We really want to get out of the tool chain level argument
	// translation business, as it makes the driver functionality much
	// more opaque. For now, we follow gcc closely solely for the
	// purpose of easily achieving feature parity & testability. Once we
	// have something that works, we should reevaluate each translation
	// and try to push it down into tool specific logic.

	for (Arg *A : Args) {
	if (A->getOption().matches(options::OPT_Xarch__)) {
	// Skip this argument unless the architecture matches either the toolchain
	// triple arch, or the arch being bound.
	llvm::Triple::ArchType XarchArch =
	tools::darwin::getArchTypeForMachOArchName(A->getValue(0));
	if (!(XarchArch == getArch() \|\|
	(!BoundArch.empty() &&
	XarchArch ==
	tools::darwin::getArchTypeForMachOArchName(BoundArch))))
	continue;

	Arg *OriginalArg = A;
	unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
	unsigned Prev = Index;
	std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));

	// If the argument parsing failed or more than one argument was
	// consumed, the -Xarch_ argument's parameter tried to consume
	// extra arguments. Emit an error and ignore.
	//
	// We also want to disallow any options which would alter the
	// driver behavior; that isn't going to work in our model. We
	// use isDriverOption() as an approximation, although things
	// like -O4 are going to slip through.
	if (!XarchArg \|\| Index > Prev + 1) {
	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
	<< A->getAsString(Args);
	continue;
	} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
	<< A->getAsString(Args);
	continue;
	}

	XarchArg->setBaseArg(A);

	A = XarchArg.release();
	DAL->AddSynthesizedArg(A);

	// Linker input arguments require custom handling. The problem is that we
	// have already constructed the phase actions, so we can not treat them as
	// "input arguments".
	if (A->getOption().hasFlag(options::LinkerInput)) {
	// Convert the argument into individual Zlinker_input_args.
	for (const char *Value : A->getValues()) {
	DAL->AddSeparateArg(
	OriginalArg, Opts.getOption(options::OPT_Zlinker_input), Value);
	}
	continue;
	}
	}

	// Sob. These is strictly gcc compatible for the time being. Apple
	// gcc translates options twice, which means that self-expanding
	// options add duplicates.
	switch ((options::ID)A->getOption().getID()) {
	default:
	DAL->append(A);
	break;

	case options::OPT_mkernel:
	case options::OPT_fapple_kext:
	DAL->append(A);
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_static));
	break;

	case options::OPT_dependency_file:
	DAL->AddSeparateArg(A, Opts.getOption(options::OPT_MF), A->getValue());
	break;

	case options::OPT_gfull:
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_g_Flag));
	DAL->AddFlagArg(
	A, Opts.getOption(options::OPT_fno_eliminate_unused_debug_symbols));
	break;

	case options::OPT_gused:
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_g_Flag));
	DAL->AddFlagArg(
	A, Opts.getOption(options::OPT_feliminate_unused_debug_symbols));
	break;

	case options::OPT_shared:
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_dynamiclib));
	break;

	case options::OPT_fconstant_cfstrings:
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_mconstant_cfstrings));
	break;

	case options::OPT_fno_constant_cfstrings:
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_mno_constant_cfstrings));
	break;

	case options::OPT_Wnonportable_cfstrings:
	DAL->AddFlagArg(A,
	Opts.getOption(options::OPT_mwarn_nonportable_cfstrings));
	break;

	case options::OPT_Wno_nonportable_cfstrings:
	DAL->AddFlagArg(
	A, Opts.getOption(options::OPT_mno_warn_nonportable_cfstrings));
	break;

	case options::OPT_fpascal_strings:
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_mpascal_strings));
	break;

	case options::OPT_fno_pascal_strings:
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_mno_pascal_strings));
	break;
	}
	}

	if (getTriple().getArch() == llvm::Triple::x86 \|\|
	getTriple().getArch() == llvm::Triple::x86_64)
	if (!Args.hasArgNoClaim(options::OPT_mtune_EQ))
	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mtune_EQ),
	"core2");

	// Add the arch options based on the particular spelling of -arch, to match
	// how the driver driver works.
	if (!BoundArch.empty()) {
	StringRef Name = BoundArch;
	const Option MCpu = Opts.getOption(options::OPT_mcpu_EQ);
	const Option MArch = Opts.getOption(clang::driver::options::OPT_march_EQ);

	// This code must be kept in sync with LLVM's getArchTypeForDarwinArch,
	// which defines the list of which architectures we accept.
	if (Name == "ppc")
	;
	else if (Name == "ppc601")
	DAL->AddJoinedArg(nullptr, MCpu, "601");
	else if (Name == "ppc603")
	DAL->AddJoinedArg(nullptr, MCpu, "603");
	else if (Name == "ppc604")
	DAL->AddJoinedArg(nullptr, MCpu, "604");
	else if (Name == "ppc604e")
	DAL->AddJoinedArg(nullptr, MCpu, "604e");
	else if (Name == "ppc750")
	DAL->AddJoinedArg(nullptr, MCpu, "750");
	else if (Name == "ppc7400")
	DAL->AddJoinedArg(nullptr, MCpu, "7400");
	else if (Name == "ppc7450")
	DAL->AddJoinedArg(nullptr, MCpu, "7450");
	else if (Name == "ppc970")
	DAL->AddJoinedArg(nullptr, MCpu, "970");

	else if (Name == "ppc64" \|\| Name == "ppc64le")
	DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_m64));

	else if (Name == "i386")
	;
	else if (Name == "i486")
	DAL->AddJoinedArg(nullptr, MArch, "i486");
	else if (Name == "i586")
	DAL->AddJoinedArg(nullptr, MArch, "i586");
	else if (Name == "i686")
	DAL->AddJoinedArg(nullptr, MArch, "i686");
	else if (Name == "pentium")
	DAL->AddJoinedArg(nullptr, MArch, "pentium");
	else if (Name == "pentium2")
	DAL->AddJoinedArg(nullptr, MArch, "pentium2");
	else if (Name == "pentpro")
	DAL->AddJoinedArg(nullptr, MArch, "pentiumpro");
	else if (Name == "pentIIm3")
	DAL->AddJoinedArg(nullptr, MArch, "pentium2");

	else if (Name == "x86_64")
	DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_m64));
	else if (Name == "x86_64h") {
	DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_m64));
	DAL->AddJoinedArg(nullptr, MArch, "x86_64h");
	}

	else if (Name == "arm")
	DAL->AddJoinedArg(nullptr, MArch, "armv4t");
	else if (Name == "armv4t")
	DAL->AddJoinedArg(nullptr, MArch, "armv4t");
	else if (Name == "armv5")
	DAL->AddJoinedArg(nullptr, MArch, "armv5tej");
	else if (Name == "xscale")
	DAL->AddJoinedArg(nullptr, MArch, "xscale");
	else if (Name == "armv6")
	DAL->AddJoinedArg(nullptr, MArch, "armv6k");
	else if (Name == "armv6m")
	DAL->AddJoinedArg(nullptr, MArch, "armv6m");
	else if (Name == "armv7")
	DAL->AddJoinedArg(nullptr, MArch, "armv7a");
	else if (Name == "armv7em")
	DAL->AddJoinedArg(nullptr, MArch, "armv7em");
	else if (Name == "armv7k")
	DAL->AddJoinedArg(nullptr, MArch, "armv7k");
	else if (Name == "armv7m")
	DAL->AddJoinedArg(nullptr, MArch, "armv7m");
	else if (Name == "armv7s")
	DAL->AddJoinedArg(nullptr, MArch, "armv7s");
	}

	return DAL;
	}

	void MachO::AddLinkRuntimeLibArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	// Embedded targets are simple at the moment, not supporting sanitizers and
	// with different libraries for each member of the product { static, PIC } x
	// { hard-float, soft-float }
	llvm::SmallString<32> CompilerRT = StringRef("libclang_rt.");
	CompilerRT +=
	(tools::arm::getARMFloatABI(*this, Args) == tools::arm::FloatABI::Hard)
	? "hard"
	: "soft";
	CompilerRT += Args.hasArg(options::OPT_fPIC) ? "_pic.a" : "_static.a";

	AddLinkRuntimeLib(Args, CmdArgs, CompilerRT, false, true);
	}

	bool Darwin::isAlignedAllocationUnavailable() const {
	switch (TargetPlatform) {
	case MacOS: // Earlier than 10.13.
	return TargetVersion < VersionTuple(10U, 13U, 0U);
	case IPhoneOS:
	case IPhoneOSSimulator:
	case TvOS:
	case TvOSSimulator: // Earlier than 11.0.
	return TargetVersion < VersionTuple(11U, 0U, 0U);
	case WatchOS:
	case WatchOSSimulator: // Earlier than 4.0.
	return TargetVersion < VersionTuple(4U, 0U, 0U);
	}
	llvm_unreachable("Unsupported platform");
	}

	void Darwin::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args,
	Action::OffloadKind DeviceOffloadKind) const {
	if (isAlignedAllocationUnavailable())
	CC1Args.push_back("-faligned-alloc-unavailable");
	}

	DerivedArgList *
	Darwin::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
	Action::OffloadKind DeviceOffloadKind) const {
	// First get the generic Apple args, before moving onto Darwin-specific ones.
	DerivedArgList *DAL =
	MachO::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
	const OptTable &Opts = getDriver().getOpts();

	// If no architecture is bound, none of the translations here are relevant.
	if (BoundArch.empty())
	return DAL;

	// Add an explicit version min argument for the deployment target. We do this
	// after argument translation because -Xarch_ arguments may add a version min
	// argument.
	AddDeploymentTarget(*DAL);

	// For iOS 6, undo the translation to add -static for -mkernel/-fapple-kext.
	// FIXME: It would be far better to avoid inserting those -static arguments,
	// but we can't check the deployment target in the translation code until
	// it is set here.
	if (isTargetWatchOSBased() \|\|
	(isTargetIOSBased() && !isIPhoneOSVersionLT(6, 0))) {
	for (ArgList::iterator it = DAL->begin(), ie = DAL->end(); it != ie; ) {
	Arg A = it;
	++it;
	if (A->getOption().getID() != options::OPT_mkernel &&
	A->getOption().getID() != options::OPT_fapple_kext)
	continue;
	assert(it != ie && "unexpected argument translation");
	A = *it;
	assert(A->getOption().getID() == options::OPT_static &&
	"missing expected -static argument");
	*it = nullptr;
	++it;
	}
	}

	if (!Args.getLastArg(options::OPT_stdlib_EQ) &&
	GetCXXStdlibType(Args) == ToolChain::CST_Libcxx)
	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_stdlib_EQ),
	"libc++");

	// Validate the C++ standard library choice.
	CXXStdlibType Type = GetCXXStdlibType(*DAL);
	if (Type == ToolChain::CST_Libcxx) {
	// Check whether the target provides libc++.
	StringRef where;

	// Complain about targeting iOS < 5.0 in any way.
	if (isTargetIOSBased() && isIPhoneOSVersionLT(5, 0))
	where = "iOS 5.0";

	if (where != StringRef()) {
	getDriver().Diag(clang::diag::err_drv_invalid_libcxx_deployment) << where;
	}
	}

	auto Arch = tools::darwin::getArchTypeForMachOArchName(BoundArch);
	if ((Arch == llvm::Triple::arm \|\| Arch == llvm::Triple::thumb)) {
	if (Args.hasFlag(options::OPT_fomit_frame_pointer,
	options::OPT_fno_omit_frame_pointer, false))
	getDriver().Diag(clang::diag::warn_drv_unsupported_opt_for_target)
	<< "-fomit-frame-pointer" << BoundArch;
	}

	return DAL;
	}

	bool MachO::IsUnwindTablesDefault(const ArgList &Args) const {
	- return !UseSjLjExceptions(Args);
	+ // Unwind tables are not emitted if -fno-exceptions is supplied (except when
	+ // targeting x86_64).
	+ return getArch() == llvm::Triple::x86_64 \|\|
	+ (!UseSjLjExceptions(Args) &&
	+ Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions,
	+ true));
	}

	bool MachO::UseDwarfDebugFlags() const {
	if (const char *S = ::getenv("RC_DEBUG_OPTIONS"))
	return S[0] != '\0';
	return false;
	}

	bool Darwin::UseSjLjExceptions(const ArgList &Args) const {
	// Darwin uses SjLj exceptions on ARM.
	if (getTriple().getArch() != llvm::Triple::arm &&
	getTriple().getArch() != llvm::Triple::thumb)
	return false;

	// Only watchOS uses the new DWARF/Compact unwinding method.
	llvm::Triple Triple(ComputeLLVMTriple(Args));
	return !Triple.isWatchABI();
	}

	bool Darwin::SupportsEmbeddedBitcode() const {
	assert(TargetInitialized && "Target not initialized!");
	if (isTargetIPhoneOS() && isIPhoneOSVersionLT(6, 0))
	return false;
	return true;
	}

	bool MachO::isPICDefault() const { return true; }

	bool MachO::isPIEDefault() const { return false; }

	bool MachO::isPICDefaultForced() const {
	return (getArch() == llvm::Triple::x86_64 \|\|
	getArch() == llvm::Triple::aarch64);
	}

	bool MachO::SupportsProfiling() const {
	// Profiling instrumentation is only supported on x86.
	return getArch() == llvm::Triple::x86 \|\| getArch() == llvm::Triple::x86_64;
	}

	void Darwin::addMinVersionArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	VersionTuple TargetVersion = getTargetVersion();

	if (isTargetWatchOS())
	CmdArgs.push_back("-watchos_version_min");
	else if (isTargetWatchOSSimulator())
	CmdArgs.push_back("-watchos_simulator_version_min");
	else if (isTargetTvOS())
	CmdArgs.push_back("-tvos_version_min");
	else if (isTargetTvOSSimulator())
	CmdArgs.push_back("-tvos_simulator_version_min");
	else if (isTargetIOSSimulator())
	CmdArgs.push_back("-ios_simulator_version_min");
	else if (isTargetIOSBased())
	CmdArgs.push_back("-iphoneos_version_min");
	else {
	assert(isTargetMacOS() && "unexpected target");
	CmdArgs.push_back("-macosx_version_min");
	}

	CmdArgs.push_back(Args.MakeArgString(TargetVersion.getAsString()));
	}

	void Darwin::addStartObjectFileArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	// Derived from startfile spec.
	if (Args.hasArg(options::OPT_dynamiclib)) {
	// Derived from darwin_dylib1 spec.
	if (isTargetWatchOSBased()) {
	; // watchOS does not need dylib1.o.
	} else if (isTargetIOSSimulator()) {
	; // iOS simulator does not need dylib1.o.
	} else if (isTargetIPhoneOS()) {
	if (isIPhoneOSVersionLT(3, 1))
	CmdArgs.push_back("-ldylib1.o");
	} else {
	if (isMacosxVersionLT(10, 5))
	CmdArgs.push_back("-ldylib1.o");
	else if (isMacosxVersionLT(10, 6))
	CmdArgs.push_back("-ldylib1.10.5.o");
	}
	} else {
	if (Args.hasArg(options::OPT_bundle)) {
	if (!Args.hasArg(options::OPT_static)) {
	// Derived from darwin_bundle1 spec.
	if (isTargetWatchOSBased()) {
	; // watchOS does not need bundle1.o.
	} else if (isTargetIOSSimulator()) {
	; // iOS simulator does not need bundle1.o.
	} else if (isTargetIPhoneOS()) {
	if (isIPhoneOSVersionLT(3, 1))
	CmdArgs.push_back("-lbundle1.o");
	} else {
	if (isMacosxVersionLT(10, 6))
	CmdArgs.push_back("-lbundle1.o");
	}
	}
	} else {
	if (Args.hasArg(options::OPT_pg) && SupportsProfiling()) {
	if (Args.hasArg(options::OPT_static) \|\|
	Args.hasArg(options::OPT_object) \|\|
	Args.hasArg(options::OPT_preload)) {
	CmdArgs.push_back("-lgcrt0.o");
	} else {
	CmdArgs.push_back("-lgcrt1.o");

	// darwin_crt2 spec is empty.
	}
	// By default on OS X 10.8 and later, we don't link with a crt1.o
	// file and the linker knows to use _main as the entry point. But,
	// when compiling with -pg, we need to link with the gcrt1.o file,
	// so pass the -no_new_main option to tell the linker to use the
	// "start" symbol as the entry point.
	if (isTargetMacOS() && !isMacosxVersionLT(10, 8))
	CmdArgs.push_back("-no_new_main");
	} else {
	if (Args.hasArg(options::OPT_static) \|\|
	Args.hasArg(options::OPT_object) \|\|
	Args.hasArg(options::OPT_preload)) {
	CmdArgs.push_back("-lcrt0.o");
	} else {
	// Derived from darwin_crt1 spec.
	if (isTargetWatchOSBased()) {
	; // watchOS does not need crt1.o.
	} else if (isTargetIOSSimulator()) {
	; // iOS simulator does not need crt1.o.
	} else if (isTargetIPhoneOS()) {
	if (getArch() == llvm::Triple::aarch64)
	; // iOS does not need any crt1 files for arm64
	else if (isIPhoneOSVersionLT(3, 1))
	CmdArgs.push_back("-lcrt1.o");
	else if (isIPhoneOSVersionLT(6, 0))
	CmdArgs.push_back("-lcrt1.3.1.o");
	} else {
	if (isMacosxVersionLT(10, 5))
	CmdArgs.push_back("-lcrt1.o");
	else if (isMacosxVersionLT(10, 6))
	CmdArgs.push_back("-lcrt1.10.5.o");
	else if (isMacosxVersionLT(10, 8))
	CmdArgs.push_back("-lcrt1.10.6.o");

	// darwin_crt2 spec is empty.
	}
	}
	}
	}
	}

	if (!isTargetIPhoneOS() && Args.hasArg(options::OPT_shared_libgcc) &&
	!isTargetWatchOS() &&
	isMacosxVersionLT(10, 5)) {
	const char *Str = Args.MakeArgString(GetFilePath("crt3.o"));
	CmdArgs.push_back(Str);
	}
	}

	bool Darwin::SupportsObjCGC() const { return isTargetMacOS(); }

	void Darwin::CheckObjCARC() const {
	if (isTargetIOSBased() \|\| isTargetWatchOSBased() \|\|
	(isTargetMacOS() && !isMacosxVersionLT(10, 6)))
	return;
	getDriver().Diag(diag::err_arc_unsupported_on_toolchain);
	}

	SanitizerMask Darwin::getSupportedSanitizers() const {
	const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
	SanitizerMask Res = ToolChain::getSupportedSanitizers();
	Res \|= SanitizerKind::Address;
	Res \|= SanitizerKind::Leak;
	Res \|= SanitizerKind::Fuzzer;
	if (isTargetMacOS()) {
	if (!isMacosxVersionLT(10, 9))
	Res \|= SanitizerKind::Vptr;
	Res \|= SanitizerKind::SafeStack;
	if (IsX86_64)
	Res \|= SanitizerKind::Thread;
	} else if (isTargetIOSSimulator() \|\| isTargetTvOSSimulator()) {
	if (IsX86_64)
	Res \|= SanitizerKind::Thread;
	}
	return Res;
	}

	void Darwin::printVerboseInfo(raw_ostream &OS) const {
	CudaInstallation.print(OS);
	}
	Index: head/contrib/llvm/tools/clang/lib/Driver/ToolChains/MSVC.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Driver/ToolChains/MSVC.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/Driver/ToolChains/MSVC.cpp (revision 322855)
	@@ -1,1426 +1,1463 @@
	//===--- ToolChains.cpp - ToolChain Implementations -----------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	#include "MSVC.h"
	#include "CommonArgs.h"
	#include "Darwin.h"
	#include "clang/Basic/CharInfo.h"
	#include "clang/Basic/Version.h"
	#include "clang/Driver/Compilation.h"
	#include "clang/Driver/Driver.h"
	#include "clang/Driver/DriverDiagnostic.h"
	#include "clang/Driver/Options.h"
	#include "clang/Driver/SanitizerArgs.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Config/llvm-config.h"
	#include "llvm/Option/Arg.h"
	#include "llvm/Option/ArgList.h"
	#include "llvm/Support/ConvertUTF.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/Host.h"
	#include "llvm/Support/MemoryBuffer.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/Process.h"
	#include <cstdio>

	// Include the necessary headers to interface with the Windows registry and
	// environment.
	#if defined(LLVM_ON_WIN32)
	#define USE_WIN32
	#endif

	#ifdef USE_WIN32
	#define WIN32_LEAN_AND_MEAN
	#define NOGDI
	#ifndef NOMINMAX
	#define NOMINMAX
	#endif
	#include <windows.h>
	#endif

	#ifdef _MSC_VER
	// Don't support SetupApi on MinGW.
	#define USE_MSVC_SETUP_API

	// Make sure this comes before MSVCSetupApi.h
	#include <comdef.h>

	#include "MSVCSetupApi.h"
	#include "llvm/Support/COM.h"
	_COM_SMARTPTR_TYPEDEF(ISetupConfiguration, __uuidof(ISetupConfiguration));
	_COM_SMARTPTR_TYPEDEF(ISetupConfiguration2, __uuidof(ISetupConfiguration2));
	_COM_SMARTPTR_TYPEDEF(ISetupHelper, __uuidof(ISetupHelper));
	_COM_SMARTPTR_TYPEDEF(IEnumSetupInstances, __uuidof(IEnumSetupInstances));
	_COM_SMARTPTR_TYPEDEF(ISetupInstance, __uuidof(ISetupInstance));
	_COM_SMARTPTR_TYPEDEF(ISetupInstance2, __uuidof(ISetupInstance2));
	#endif

	using namespace clang::driver;
	using namespace clang::driver::toolchains;
	using namespace clang::driver::tools;
	using namespace clang;
	using namespace llvm::opt;

	// Defined below.
	// Forward declare this so there aren't too many things above the constructor.
	static bool getSystemRegistryString(const char keyPath, const char valueName,
	std::string &value, std::string *phValue);

	// Check various environment variables to try and find a toolchain.
	static bool findVCToolChainViaEnvironment(std::string &Path,
	- bool &IsVS2017OrNewer) {
	+ MSVCToolChain::ToolsetLayout &VSLayout) {
	// These variables are typically set by vcvarsall.bat
	// when launching a developer command prompt.
	if (llvm::Optional<std::string> VCToolsInstallDir =
	llvm::sys::Process::GetEnv("VCToolsInstallDir")) {
	// This is only set by newer Visual Studios, and it leads straight to
	// the toolchain directory.
	Path = std::move(*VCToolsInstallDir);
	- IsVS2017OrNewer = true;
	+ VSLayout = MSVCToolChain::ToolsetLayout::VS2017OrNewer;
	return true;
	}
	if (llvm::Optional<std::string> VCInstallDir =
	llvm::sys::Process::GetEnv("VCINSTALLDIR")) {
	// If the previous variable isn't set but this one is, then we've found
	// an older Visual Studio. This variable is set by newer Visual Studios too,
	// so this check has to appear second.
	// In older Visual Studios, the VC directory is the toolchain.
	Path = std::move(*VCInstallDir);
	- IsVS2017OrNewer = false;
	+ VSLayout = MSVCToolChain::ToolsetLayout::OlderVS;
	return true;
	}

	// We couldn't find any VC environment variables. Let's walk through PATH and
	// see if it leads us to a VC toolchain bin directory. If it does, pick the
	// first one that we find.
	if (llvm::Optional<std::string> PathEnv =
	llvm::sys::Process::GetEnv("PATH")) {
	llvm::SmallVector<llvm::StringRef, 8> PathEntries;
	llvm::StringRef(*PathEnv).split(PathEntries, llvm::sys::EnvPathSeparator);
	for (llvm::StringRef PathEntry : PathEntries) {
	if (PathEntry.empty())
	continue;

	llvm::SmallString<256> ExeTestPath;

	// If cl.exe doesn't exist, then this definitely isn't a VC toolchain.
	ExeTestPath = PathEntry;
	llvm::sys::path::append(ExeTestPath, "cl.exe");
	if (!llvm::sys::fs::exists(ExeTestPath))
	continue;

	// cl.exe existing isn't a conclusive test for a VC toolchain; clang also
	// has a cl.exe. So let's check for link.exe too.
	ExeTestPath = PathEntry;
	llvm::sys::path::append(ExeTestPath, "link.exe");
	if (!llvm::sys::fs::exists(ExeTestPath))
	continue;

	// whatever/VC/bin --> old toolchain, VC dir is toolchain dir.
	llvm::StringRef TestPath = PathEntry;
	bool IsBin = llvm::sys::path::filename(TestPath).equals_lower("bin");
	if (!IsBin) {
	// Strip any architecture subdir like "amd64".
	TestPath = llvm::sys::path::parent_path(TestPath);
	IsBin = llvm::sys::path::filename(TestPath).equals_lower("bin");
	}
	if (IsBin) {
	llvm::StringRef ParentPath = llvm::sys::path::parent_path(TestPath);
	- if (llvm::sys::path::filename(ParentPath) == "VC") {
	+ llvm::StringRef ParentFilename = llvm::sys::path::filename(ParentPath);
	+ if (ParentFilename == "VC") {
	Path = ParentPath;
	- IsVS2017OrNewer = false;
	+ VSLayout = MSVCToolChain::ToolsetLayout::OlderVS;
	return true;
	}
	+ if (ParentFilename == "x86ret" \|\| ParentFilename == "x86chk"
	+ \|\| ParentFilename == "amd64ret" \|\| ParentFilename == "amd64chk") {
	+ Path = ParentPath;
	+ VSLayout = MSVCToolChain::ToolsetLayout::DevDivInternal;
	+ return true;
	+ }

	} else {
	// This could be a new (>=VS2017) toolchain. If it is, we should find
	// path components with these prefixes when walking backwards through
	// the path.
	// Note: empty strings match anything.
	llvm::StringRef ExpectedPrefixes[] = {"", "Host", "bin", "",
	"MSVC", "Tools", "VC"};

	auto It = llvm::sys::path::rbegin(PathEntry);
	auto End = llvm::sys::path::rend(PathEntry);
	for (llvm::StringRef Prefix : ExpectedPrefixes) {
	if (It == End)
	goto NotAToolChain;
	if (!It->startswith(Prefix))
	goto NotAToolChain;
	++It;
	}

	// We've found a new toolchain!
	// Back up 3 times (/bin/Host/arch) to get the root path.
	llvm::StringRef ToolChainPath(PathEntry);
	for (int i = 0; i < 3; ++i)
	ToolChainPath = llvm::sys::path::parent_path(ToolChainPath);

	Path = ToolChainPath;
	- IsVS2017OrNewer = true;
	+ VSLayout = MSVCToolChain::ToolsetLayout::VS2017OrNewer;
	return true;
	}

	NotAToolChain:
	continue;
	}
	}
	return false;
	}

	// Query the Setup Config server for installs, then pick the newest version
	// and find its default VC toolchain.
	// This is the preferred way to discover new Visual Studios, as they're no
	// longer listed in the registry.
	static bool findVCToolChainViaSetupConfig(std::string &Path,
	- bool &IsVS2017OrNewer) {
	+ MSVCToolChain::ToolsetLayout &VSLayout) {
	#if !defined(USE_MSVC_SETUP_API)
	return false;
	#else
	// FIXME: This really should be done once in the top-level program's main
	// function, as it may have already been initialized with a different
	// threading model otherwise.
	llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::SingleThreaded);
	HRESULT HR;

	// _com_ptr_t will throw a _com_error if a COM calls fail.
	// The LLVM coding standards forbid exception handling, so we'll have to
	// stop them from being thrown in the first place.
	// The destructor will put the regular error handler back when we leave
	// this scope.
	struct SuppressCOMErrorsRAII {
	static void __stdcall handler(HRESULT hr, IErrorInfo *perrinfo) {}

	SuppressCOMErrorsRAII() { _set_com_error_handler(handler); }

	~SuppressCOMErrorsRAII() { _set_com_error_handler(_com_raise_error); }

	} COMErrorSuppressor;

	ISetupConfigurationPtr Query;
	HR = Query.CreateInstance(__uuidof(SetupConfiguration));
	if (FAILED(HR))
	return false;

	IEnumSetupInstancesPtr EnumInstances;
	HR = ISetupConfiguration2Ptr(Query)->EnumAllInstances(&EnumInstances);
	if (FAILED(HR))
	return false;

	ISetupInstancePtr Instance;
	HR = EnumInstances->Next(1, &Instance, nullptr);
	if (HR != S_OK)
	return false;

	ISetupInstancePtr NewestInstance;
	Optional<uint64_t> NewestVersionNum;
	do {
	bstr_t VersionString;
	uint64_t VersionNum;
	HR = Instance->GetInstallationVersion(VersionString.GetAddress());
	if (FAILED(HR))
	continue;
	HR = ISetupHelperPtr(Query)->ParseVersion(VersionString, &VersionNum);
	if (FAILED(HR))
	continue;
	if (!NewestVersionNum \|\| (VersionNum > NewestVersionNum)) {
	NewestInstance = Instance;
	NewestVersionNum = VersionNum;
	}
	} while ((HR = EnumInstances->Next(1, &Instance, nullptr)) == S_OK);

	if (!NewestInstance)
	return false;

	bstr_t VCPathWide;
	HR = NewestInstance->ResolvePath(L"VC", VCPathWide.GetAddress());
	if (FAILED(HR))
	return false;

	std::string VCRootPath;
	llvm::convertWideToUTF8(std::wstring(VCPathWide), VCRootPath);

	llvm::SmallString<256> ToolsVersionFilePath(VCRootPath);
	llvm::sys::path::append(ToolsVersionFilePath, "Auxiliary", "Build",
	"Microsoft.VCToolsVersion.default.txt");

	auto ToolsVersionFile = llvm::MemoryBuffer::getFile(ToolsVersionFilePath);
	if (!ToolsVersionFile)
	return false;

	llvm::SmallString<256> ToolchainPath(VCRootPath);
	llvm::sys::path::append(ToolchainPath, "Tools", "MSVC",
	ToolsVersionFile->get()->getBuffer().rtrim());
	if (!llvm::sys::fs::is_directory(ToolchainPath))
	return false;

	Path = ToolchainPath.str();
	- IsVS2017OrNewer = true;
	+ VSLayout = MSVCToolChain::ToolsetLayout::VS2017OrNewer;
	return true;
	#endif
	}

	// Look in the registry for Visual Studio installs, and use that to get
	// a toolchain path. VS2017 and newer don't get added to the registry.
	// So if we find something here, we know that it's an older version.
	static bool findVCToolChainViaRegistry(std::string &Path,
	- bool &IsVS2017OrNewer) {
	+ MSVCToolChain::ToolsetLayout &VSLayout) {
	std::string VSInstallPath;
	if (getSystemRegistryString(R"(SOFTWARE\Microsoft\VisualStudio\$VERSION)",
	"InstallDir", VSInstallPath, nullptr) \|\|
	getSystemRegistryString(R"(SOFTWARE\Microsoft\VCExpress\$VERSION)",
	"InstallDir", VSInstallPath, nullptr)) {
	if (!VSInstallPath.empty()) {
	llvm::SmallString<256> VCPath(llvm::StringRef(
	VSInstallPath.c_str(), VSInstallPath.find(R"(\Common7\IDE)")));
	llvm::sys::path::append(VCPath, "VC");

	Path = VCPath.str();
	- IsVS2017OrNewer = false;
	+ VSLayout = MSVCToolChain::ToolsetLayout::OlderVS;
	return true;
	}
	}
	return false;
	}

	// Try to find Exe from a Visual Studio distribution. This first tries to find
	// an installed copy of Visual Studio and, failing that, looks in the PATH,
	// making sure that whatever executable that's found is not a same-named exe
	// from clang itself to prevent clang from falling back to itself.
	static std::string FindVisualStudioExecutable(const ToolChain &TC,
	const char *Exe) {
	const auto &MSVC = static_cast<const toolchains::MSVCToolChain &>(TC);
	SmallString<128> FilePath(MSVC.getSubDirectoryPath(
	toolchains::MSVCToolChain::SubDirectoryType::Bin));
	llvm::sys::path::append(FilePath, Exe);
	return llvm::sys::fs::can_execute(FilePath) ? FilePath.str() : Exe;
	}

	void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	ArgStringList CmdArgs;

	auto &TC = static_cast<const toolchains::MSVCToolChain &>(getToolChain());

	assert((Output.isFilename() \|\| Output.isNothing()) && "invalid output");
	if (Output.isFilename())
	CmdArgs.push_back(
	Args.MakeArgString(std::string("-out:") + Output.getFilename()));

	if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles) &&
	!C.getDriver().IsCLMode())
	CmdArgs.push_back("-defaultlib:libcmt");

	if (!llvm::sys::Process::GetEnv("LIB")) {
	// If the VC environment hasn't been configured (perhaps because the user
	// did not run vcvarsall), try to build a consistent link environment. If
	// the environment variable is set however, assume the user knows what
	// they're doing.
	CmdArgs.push_back(Args.MakeArgString(
	Twine("-libpath:") +
	TC.getSubDirectoryPath(
	toolchains::MSVCToolChain::SubDirectoryType::Lib)));

	if (TC.useUniversalCRT()) {
	std::string UniversalCRTLibPath;
	if (TC.getUniversalCRTLibraryPath(UniversalCRTLibPath))
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-libpath:") + UniversalCRTLibPath));
	}

	std::string WindowsSdkLibPath;
	if (TC.getWindowsSDKLibraryPath(WindowsSdkLibPath))
	CmdArgs.push_back(
	Args.MakeArgString(std::string("-libpath:") + WindowsSdkLibPath));
	}

	if (!C.getDriver().IsCLMode() && Args.hasArg(options::OPT_L))
	for (const auto &LibPath : Args.getAllArgValues(options::OPT_L))
	CmdArgs.push_back(Args.MakeArgString("-libpath:" + LibPath));

	CmdArgs.push_back("-nologo");

	if (Args.hasArg(options::OPT_g_Group, options::OPT__SLASH_Z7,
	options::OPT__SLASH_Zd))
	CmdArgs.push_back("-debug");

	bool DLL = Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd,
	options::OPT_shared);
	if (DLL) {
	CmdArgs.push_back(Args.MakeArgString("-dll"));

	SmallString<128> ImplibName(Output.getFilename());
	llvm::sys::path::replace_extension(ImplibName, "lib");
	CmdArgs.push_back(Args.MakeArgString(std::string("-implib:") + ImplibName));
	}

	if (TC.getSanitizerArgs().needsAsanRt()) {
	CmdArgs.push_back(Args.MakeArgString("-debug"));
	CmdArgs.push_back(Args.MakeArgString("-incremental:no"));
	if (TC.getSanitizerArgs().needsSharedAsanRt() \|\|
	Args.hasArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd)) {
	for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"})
	CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
	// Make sure the dynamic runtime thunk is not optimized out at link time
	// to ensure proper SEH handling.
	CmdArgs.push_back(Args.MakeArgString(
	TC.getArch() == llvm::Triple::x86
	? "-include:___asan_seh_interceptor"
	: "-include:__asan_seh_interceptor"));
	// Make sure the linker consider all object files from the dynamic runtime
	// thunk.
	CmdArgs.push_back(Args.MakeArgString(std::string("-wholearchive:") +
	TC.getCompilerRT(Args, "asan_dynamic_runtime_thunk")));
	} else if (DLL) {
	CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk"));
	} else {
	for (const auto &Lib : {"asan", "asan_cxx"}) {
	CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
	// Make sure the linker consider all object files from the static lib.
	// This is necessary because instrumented dlls need access to all the
	// interface exported by the static lib in the main executable.
	CmdArgs.push_back(Args.MakeArgString(std::string("-wholearchive:") +
	TC.getCompilerRT(Args, Lib)));
	}
	}
	}

	Args.AddAllArgValues(CmdArgs, options::OPT__SLASH_link);

	if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
	options::OPT_fno_openmp, false)) {
	CmdArgs.push_back("-nodefaultlib:vcomp.lib");
	CmdArgs.push_back("-nodefaultlib:vcompd.lib");
	CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") +
	TC.getDriver().Dir + "/../lib"));
	switch (TC.getDriver().getOpenMPRuntime(Args)) {
	case Driver::OMPRT_OMP:
	CmdArgs.push_back("-defaultlib:libomp.lib");
	break;
	case Driver::OMPRT_IOMP5:
	CmdArgs.push_back("-defaultlib:libiomp5md.lib");
	break;
	case Driver::OMPRT_GOMP:
	break;
	case Driver::OMPRT_Unknown:
	// Already diagnosed.
	break;
	}
	}

	// Add compiler-rt lib in case if it was explicitly
	// specified as an argument for --rtlib option.
	if (!Args.hasArg(options::OPT_nostdlib)) {
	AddRunTimeLibs(TC, TC.getDriver(), CmdArgs, Args);
	}

	// Add filenames, libraries, and other linker inputs.
	for (const auto &Input : Inputs) {
	if (Input.isFilename()) {
	CmdArgs.push_back(Input.getFilename());
	continue;
	}

	const Arg &A = Input.getInputArg();

	// Render -l options differently for the MSVC linker.
	if (A.getOption().matches(options::OPT_l)) {
	StringRef Lib = A.getValue();
	const char *LinkLibArg;
	if (Lib.endswith(".lib"))
	LinkLibArg = Args.MakeArgString(Lib);
	else
	LinkLibArg = Args.MakeArgString(Lib + ".lib");
	CmdArgs.push_back(LinkLibArg);
	continue;
	}

	// Otherwise, this is some other kind of linker input option like -Wl, -z,
	// or -L. Render it, even if MSVC doesn't understand it.
	A.renderAsInput(Args, CmdArgs);
	}

	TC.addProfileRTLibs(Args, CmdArgs);

	std::vector<const char *> Environment;

	// We need to special case some linker paths. In the case of lld, we need to
	// translate 'lld' into 'lld-link', and in the case of the regular msvc
	// linker, we need to use a special search algorithm.
	llvm::SmallString<128> linkPath;
	StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link");
	if (Linker.equals_lower("lld"))
	Linker = "lld-link";

	if (Linker.equals_lower("link")) {
	// If we're using the MSVC linker, it's not sufficient to just use link
	// from the program PATH, because other environments like GnuWin32 install
	// their own link.exe which may come first.
	linkPath = FindVisualStudioExecutable(TC, "link.exe");

	#ifdef USE_WIN32
	// When cross-compiling with VS2017 or newer, link.exe expects to have
	// its containing bin directory at the top of PATH, followed by the
	// native target bin directory.
	// e.g. when compiling for x86 on an x64 host, PATH should start with:
	// /bin/HostX64/x86;/bin/HostX64/x64
	+ // This doesn't attempt to handle ToolsetLayout::DevDivInternal.
	if (TC.getIsVS2017OrNewer() &&
	llvm::Triple(llvm::sys::getProcessTriple()).getArch() != TC.getArch()) {
	auto HostArch = llvm::Triple(llvm::sys::getProcessTriple()).getArch();

	auto EnvBlockWide =
	std::unique_ptr<wchar_t[], decltype(&FreeEnvironmentStringsW)>(
	GetEnvironmentStringsW(), FreeEnvironmentStringsW);
	if (!EnvBlockWide)
	goto SkipSettingEnvironment;

	size_t EnvCount = 0;
	size_t EnvBlockLen = 0;
	while (EnvBlockWide[EnvBlockLen] != L'\0') {
	++EnvCount;
	EnvBlockLen += std::wcslen(&EnvBlockWide[EnvBlockLen]) +
	1 /string null-terminator/;
	}
	++EnvBlockLen; // add the block null-terminator

	std::string EnvBlock;
	if (!llvm::convertUTF16ToUTF8String(
	llvm::ArrayRef<char>(reinterpret_cast<char *>(EnvBlockWide.get()),
	EnvBlockLen * sizeof(EnvBlockWide[0])),
	EnvBlock))
	goto SkipSettingEnvironment;

	Environment.reserve(EnvCount);

	// Now loop over each string in the block and copy them into the
	// environment vector, adjusting the PATH variable as needed when we
	// find it.
	for (const char Cursor = EnvBlock.data(); Cursor != '\0';) {
	llvm::StringRef EnvVar(Cursor);
	if (EnvVar.startswith_lower("path=")) {
	using SubDirectoryType = toolchains::MSVCToolChain::SubDirectoryType;
	constexpr size_t PrefixLen = 5; // strlen("path=")
	Environment.push_back(Args.MakeArgString(
	EnvVar.substr(0, PrefixLen) +
	TC.getSubDirectoryPath(SubDirectoryType::Bin) +
	llvm::Twine(llvm::sys::EnvPathSeparator) +
	TC.getSubDirectoryPath(SubDirectoryType::Bin, HostArch) +
	(EnvVar.size() > PrefixLen
	? llvm::Twine(llvm::sys::EnvPathSeparator) +
	EnvVar.substr(PrefixLen)
	: "")));
	} else {
	Environment.push_back(Args.MakeArgString(EnvVar));
	}
	Cursor += EnvVar.size() + 1 /null-terminator/;
	}
	}
	SkipSettingEnvironment:;
	#endif
	} else {
	linkPath = TC.GetProgramPath(Linker.str().c_str());
	}

	auto LinkCmd = llvm::make_unique<Command>(
	JA, *this, Args.MakeArgString(linkPath), CmdArgs, Inputs);
	if (!Environment.empty())
	LinkCmd->setEnvironment(Environment);
	C.addCommand(std::move(LinkCmd));
	}

	void visualstudio::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	C.addCommand(GetCommand(C, JA, Output, Inputs, Args, LinkingOutput));
	}

	std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
	Compilation &C, const JobAction &JA, const InputInfo &Output,
	const InputInfoList &Inputs, const ArgList &Args,
	const char *LinkingOutput) const {
	ArgStringList CmdArgs;
	CmdArgs.push_back("/nologo");
	CmdArgs.push_back("/c"); // Compile only.
	CmdArgs.push_back("/W0"); // No warnings.

	// The goal is to be able to invoke this tool correctly based on
	// any flag accepted by clang-cl.

	// These are spelled the same way in clang and cl.exe,.
	Args.AddAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I});

	// Optimization level.
	if (Arg *A = Args.getLastArg(options::OPT_fbuiltin, options::OPT_fno_builtin))
	CmdArgs.push_back(A->getOption().getID() == options::OPT_fbuiltin ? "/Oi"
	: "/Oi-");
	if (Arg *A = Args.getLastArg(options::OPT_O, options::OPT_O0)) {
	if (A->getOption().getID() == options::OPT_O0) {
	CmdArgs.push_back("/Od");
	} else {
	CmdArgs.push_back("/Og");

	StringRef OptLevel = A->getValue();
	if (OptLevel == "s" \|\| OptLevel == "z")
	CmdArgs.push_back("/Os");
	else
	CmdArgs.push_back("/Ot");

	CmdArgs.push_back("/Ob2");
	}
	}
	if (Arg *A = Args.getLastArg(options::OPT_fomit_frame_pointer,
	options::OPT_fno_omit_frame_pointer))
	CmdArgs.push_back(A->getOption().getID() == options::OPT_fomit_frame_pointer
	? "/Oy"
	: "/Oy-");
	if (!Args.hasArg(options::OPT_fwritable_strings))
	CmdArgs.push_back("/GF");

	// Flags for which clang-cl has an alias.
	// FIXME: How can we ensure this stays in sync with relevant clang-cl options?

	if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR,
	/default=/false))
	CmdArgs.push_back("/GR-");

	if (Args.hasFlag(options::OPT__SLASH_GS_, options::OPT__SLASH_GS,
	/default=/false))
	CmdArgs.push_back("/GS-");

	if (Arg *A = Args.getLastArg(options::OPT_ffunction_sections,
	options::OPT_fno_function_sections))
	CmdArgs.push_back(A->getOption().getID() == options::OPT_ffunction_sections
	? "/Gy"
	: "/Gy-");
	if (Arg *A = Args.getLastArg(options::OPT_fdata_sections,
	options::OPT_fno_data_sections))
	CmdArgs.push_back(
	A->getOption().getID() == options::OPT_fdata_sections ? "/Gw" : "/Gw-");
	if (Args.hasArg(options::OPT_fsyntax_only))
	CmdArgs.push_back("/Zs");
	if (Args.hasArg(options::OPT_g_Flag, options::OPT_gline_tables_only,
	options::OPT__SLASH_Z7))
	CmdArgs.push_back("/Z7");

	std::vector<std::string> Includes =
	Args.getAllArgValues(options::OPT_include);
	for (const auto &Include : Includes)
	CmdArgs.push_back(Args.MakeArgString(std::string("/FI") + Include));

	// Flags that can simply be passed through.
	Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LD);
	Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LDd);
	Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX);
	Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX_);
	Args.AddAllArgs(CmdArgs, options::OPT__SLASH_EH);
	Args.AddAllArgs(CmdArgs, options::OPT__SLASH_Zl);

	// The order of these flags is relevant, so pick the last one.
	if (Arg *A = Args.getLastArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd,
	options::OPT__SLASH_MT, options::OPT__SLASH_MTd))
	A->render(Args, CmdArgs);

	// Use MSVC's default threadsafe statics behaviour unless there was a flag.
	if (Arg *A = Args.getLastArg(options::OPT_fthreadsafe_statics,
	options::OPT_fno_threadsafe_statics)) {
	CmdArgs.push_back(A->getOption().getID() == options::OPT_fthreadsafe_statics
	? "/Zc:threadSafeInit"
	: "/Zc:threadSafeInit-");
	}

	// Pass through all unknown arguments so that the fallback command can see
	// them too.
	Args.AddAllArgs(CmdArgs, options::OPT_UNKNOWN);

	// Input filename.
	assert(Inputs.size() == 1);
	const InputInfo &II = Inputs[0];
	assert(II.getType() == types::TY_C \|\| II.getType() == types::TY_CXX);
	CmdArgs.push_back(II.getType() == types::TY_C ? "/Tc" : "/Tp");
	if (II.isFilename())
	CmdArgs.push_back(II.getFilename());
	else
	II.getInputArg().renderAsInput(Args, CmdArgs);

	// Output filename.
	assert(Output.getType() == types::TY_Object);
	const char *Fo =
	Args.MakeArgString(std::string("/Fo") + Output.getFilename());
	CmdArgs.push_back(Fo);

	std::string Exec = FindVisualStudioExecutable(getToolChain(), "cl.exe");
	return llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec),
	CmdArgs, Inputs);
	}

	MSVCToolChain::MSVCToolChain(const Driver &D, const llvm::Triple &Triple,
	const ArgList &Args)
	: ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args) {
	getProgramPaths().push_back(getDriver().getInstalledDir());
	if (getDriver().getInstalledDir() != getDriver().Dir)
	getProgramPaths().push_back(getDriver().Dir);

	// Check the environment first, since that's probably the user telling us
	// what they want to use.
	// Failing that, just try to find the newest Visual Studio version we can
	// and use its default VC toolchain.
	- findVCToolChainViaEnvironment(VCToolChainPath, IsVS2017OrNewer) \|\|
	- findVCToolChainViaSetupConfig(VCToolChainPath, IsVS2017OrNewer) \|\|
	- findVCToolChainViaRegistry(VCToolChainPath, IsVS2017OrNewer);
	+ findVCToolChainViaEnvironment(VCToolChainPath, VSLayout) \|\|
	+ findVCToolChainViaSetupConfig(VCToolChainPath, VSLayout) \|\|
	+ findVCToolChainViaRegistry(VCToolChainPath, VSLayout);
	}

	Tool *MSVCToolChain::buildLinker() const {
	if (VCToolChainPath.empty())
	getDriver().Diag(clang::diag::warn_drv_msvc_not_found);
	return new tools::visualstudio::Linker(*this);
	}

	Tool *MSVCToolChain::buildAssembler() const {
	if (getTriple().isOSBinFormatMachO())
	return new tools::darwin::Assembler(*this);
	getDriver().Diag(clang::diag::err_no_external_assembler);
	return nullptr;
	}

	bool MSVCToolChain::IsIntegratedAssemblerDefault() const {
	return true;
	}

	bool MSVCToolChain::IsUnwindTablesDefault(const ArgList &Args) const {
	// Emit unwind tables by default on Win64. All non-x86_32 Windows platforms
	// such as ARM and PPC actually require unwind tables, but LLVM doesn't know
	// how to generate them yet.

	// Don't emit unwind tables by default for MachO targets.
	if (getTriple().isOSBinFormatMachO())
	return false;

	return getArch() == llvm::Triple::x86_64;
	}

	bool MSVCToolChain::isPICDefault() const {
	return getArch() == llvm::Triple::x86_64;
	}

	bool MSVCToolChain::isPIEDefault() const {
	return false;
	}

	bool MSVCToolChain::isPICDefaultForced() const {
	return getArch() == llvm::Triple::x86_64;
	}

	void MSVCToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
	ArgStringList &CC1Args) const {
	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
	}

	void MSVCToolChain::printVerboseInfo(raw_ostream &OS) const {
	CudaInstallation.print(OS);
	}

	// Windows SDKs and VC Toolchains group their contents into subdirectories based
	// on the target architecture. This function converts an llvm::Triple::ArchType
	// to the corresponding subdirectory name.
	static const char *llvmArchToWindowsSDKArch(llvm::Triple::ArchType Arch) {
	using ArchType = llvm::Triple::ArchType;
	switch (Arch) {
	case ArchType::x86:
	return "x86";
	case ArchType::x86_64:
	return "x64";
	case ArchType::arm:
	return "arm";
	default:
	return "";
	}
	}

	// Similar to the above function, but for Visual Studios before VS2017.
	static const char *llvmArchToLegacyVCArch(llvm::Triple::ArchType Arch) {
	using ArchType = llvm::Triple::ArchType;
	switch (Arch) {
	case ArchType::x86:
	// x86 is default in legacy VC toolchains.
	// e.g. x86 libs are directly in /lib as opposed to /lib/x86.
	return "";
	case ArchType::x86_64:
	return "amd64";
	case ArchType::arm:
	return "arm";
	default:
	return "";
	}
	}

	+// Similar to the above function, but for DevDiv internal builds.
	+static const char *llvmArchToDevDivInternalArch(llvm::Triple::ArchType Arch) {
	+ using ArchType = llvm::Triple::ArchType;
	+ switch (Arch) {
	+ case ArchType::x86:
	+ return "i386";
	+ case ArchType::x86_64:
	+ return "amd64";
	+ case ArchType::arm:
	+ return "arm";
	+ default:
	+ return "";
	+ }
	+}
	+
	// Get the path to a specific subdirectory in the current toolchain for
	// a given target architecture.
	// VS2017 changed the VC toolchain layout, so this should be used instead
	// of hardcoding paths.
	std::string
	MSVCToolChain::getSubDirectoryPath(SubDirectoryType Type,
	llvm::Triple::ArchType TargetArch) const {
	+ const char *SubdirName;
	+ const char *IncludeName;
	+ switch (VSLayout) {
	+ case ToolsetLayout::OlderVS:
	+ SubdirName = llvmArchToLegacyVCArch(TargetArch);
	+ IncludeName = "include";
	+ break;
	+ case ToolsetLayout::VS2017OrNewer:
	+ SubdirName = llvmArchToWindowsSDKArch(TargetArch);
	+ IncludeName = "include";
	+ break;
	+ case ToolsetLayout::DevDivInternal:
	+ SubdirName = llvmArchToDevDivInternalArch(TargetArch);
	+ IncludeName = "inc";
	+ break;
	+ }
	+
	llvm::SmallString<256> Path(VCToolChainPath);
	switch (Type) {
	case SubDirectoryType::Bin:
	- if (IsVS2017OrNewer) {
	- bool HostIsX64 =
	+ if (VSLayout == ToolsetLayout::VS2017OrNewer) {
	+ const bool HostIsX64 =
	llvm::Triple(llvm::sys::getProcessTriple()).isArch64Bit();
	- llvm::sys::path::append(Path, "bin", (HostIsX64 ? "HostX64" : "HostX86"),
	- llvmArchToWindowsSDKArch(TargetArch));
	-
	- } else {
	- llvm::sys::path::append(Path, "bin", llvmArchToLegacyVCArch(TargetArch));
	+ const char *const HostName = HostIsX64 ? "HostX64" : "HostX86";
	+ llvm::sys::path::append(Path, "bin", HostName, SubdirName);
	+ } else { // OlderVS or DevDivInternal
	+ llvm::sys::path::append(Path, "bin", SubdirName);
	}
	break;
	case SubDirectoryType::Include:
	- llvm::sys::path::append(Path, "include");
	+ llvm::sys::path::append(Path, IncludeName);
	break;
	case SubDirectoryType::Lib:
	- llvm::sys::path::append(
	- Path, "lib", IsVS2017OrNewer ? llvmArchToWindowsSDKArch(TargetArch)
	- : llvmArchToLegacyVCArch(TargetArch));
	+ llvm::sys::path::append(Path, "lib", SubdirName);
	break;
	}
	return Path.str();
	}

	#ifdef USE_WIN32
	static bool readFullStringValue(HKEY hkey, const char *valueName,
	std::string &value) {
	std::wstring WideValueName;
	if (!llvm::ConvertUTF8toWide(valueName, WideValueName))
	return false;

	DWORD result = 0;
	DWORD valueSize = 0;
	DWORD type = 0;
	// First just query for the required size.
	result = RegQueryValueExW(hkey, WideValueName.c_str(), NULL, &type, NULL,
	&valueSize);
	if (result != ERROR_SUCCESS \|\| type != REG_SZ \|\| !valueSize)
	return false;
	std::vector<BYTE> buffer(valueSize);
	result = RegQueryValueExW(hkey, WideValueName.c_str(), NULL, NULL, &buffer[0],
	&valueSize);
	if (result == ERROR_SUCCESS) {
	std::wstring WideValue(reinterpret_cast<const wchar_t *>(buffer.data()),
	valueSize / sizeof(wchar_t));
	if (valueSize && WideValue.back() == L'\0') {
	WideValue.pop_back();
	}
	// The destination buffer must be empty as an invariant of the conversion
	// function; but this function is sometimes called in a loop that passes in
	// the same buffer, however. Simply clear it out so we can overwrite it.
	value.clear();
	return llvm::convertWideToUTF8(WideValue, value);
	}
	return false;
	}
	#endif

	/// \brief Read registry string.
	/// This also supports a means to look for high-versioned keys by use
	/// of a $VERSION placeholder in the key path.
	/// $VERSION in the key path is a placeholder for the version number,
	/// causing the highest value path to be searched for and used.
	/// I.e. "SOFTWARE\\Microsoft\\VisualStudio\\$VERSION".
	/// There can be additional characters in the component. Only the numeric
	/// characters are compared. This function only searches HKLM.
	static bool getSystemRegistryString(const char keyPath, const char valueName,
	std::string &value, std::string *phValue) {
	#ifndef USE_WIN32
	return false;
	#else
	HKEY hRootKey = HKEY_LOCAL_MACHINE;
	HKEY hKey = NULL;
	long lResult;
	bool returnValue = false;

	const char *placeHolder = strstr(keyPath, "$VERSION");
	std::string bestName;
	// If we have a $VERSION placeholder, do the highest-version search.
	if (placeHolder) {
	const char *keyEnd = placeHolder - 1;
	const char *nextKey = placeHolder;
	// Find end of previous key.
	while ((keyEnd > keyPath) && (*keyEnd != '\\'))
	keyEnd--;
	// Find end of key containing $VERSION.
	while (nextKey && (nextKey != '\\'))
	nextKey++;
	size_t partialKeyLength = keyEnd - keyPath;
	char partialKey[256];
	if (partialKeyLength >= sizeof(partialKey))
	partialKeyLength = sizeof(partialKey) - 1;
	strncpy(partialKey, keyPath, partialKeyLength);
	partialKey[partialKeyLength] = '\0';
	HKEY hTopKey = NULL;
	lResult = RegOpenKeyExA(hRootKey, partialKey, 0, KEY_READ \| KEY_WOW64_32KEY,
	&hTopKey);
	if (lResult == ERROR_SUCCESS) {
	char keyName[256];
	double bestValue = 0.0;
	DWORD index, size = sizeof(keyName) - 1;
	for (index = 0; RegEnumKeyExA(hTopKey, index, keyName, &size, NULL, NULL,
	NULL, NULL) == ERROR_SUCCESS;
	index++) {
	const char *sp = keyName;
	while (sp && !isDigit(sp))
	sp++;
	if (!*sp)
	continue;
	const char *ep = sp + 1;
	while (ep && (isDigit(ep) \|\| (*ep == '.')))
	ep++;
	char numBuf[32];
	strncpy(numBuf, sp, sizeof(numBuf) - 1);
	numBuf[sizeof(numBuf) - 1] = '\0';
	double dvalue = strtod(numBuf, NULL);
	if (dvalue > bestValue) {
	// Test that InstallDir is indeed there before keeping this index.
	// Open the chosen key path remainder.
	bestName = keyName;
	// Append rest of key.
	bestName.append(nextKey);
	lResult = RegOpenKeyExA(hTopKey, bestName.c_str(), 0,
	KEY_READ \| KEY_WOW64_32KEY, &hKey);
	if (lResult == ERROR_SUCCESS) {
	if (readFullStringValue(hKey, valueName, value)) {
	bestValue = dvalue;
	if (phValue)
	*phValue = bestName;
	returnValue = true;
	}
	RegCloseKey(hKey);
	}
	}
	size = sizeof(keyName) - 1;
	}
	RegCloseKey(hTopKey);
	}
	} else {
	lResult =
	RegOpenKeyExA(hRootKey, keyPath, 0, KEY_READ \| KEY_WOW64_32KEY, &hKey);
	if (lResult == ERROR_SUCCESS) {
	if (readFullStringValue(hKey, valueName, value))
	returnValue = true;
	if (phValue)
	phValue->clear();
	RegCloseKey(hKey);
	}
	}
	return returnValue;
	#endif // USE_WIN32
	}

	// Find the most recent version of Universal CRT or Windows 10 SDK.
	// vcvarsqueryregistry.bat from Visual Studio 2015 sorts entries in the include
	// directory by name and uses the last one of the list.
	// So we compare entry names lexicographically to find the greatest one.
	static bool getWindows10SDKVersionFromPath(const std::string &SDKPath,
	std::string &SDKVersion) {
	SDKVersion.clear();

	std::error_code EC;
	llvm::SmallString<128> IncludePath(SDKPath);
	llvm::sys::path::append(IncludePath, "Include");
	for (llvm::sys::fs::directory_iterator DirIt(IncludePath, EC), DirEnd;
	DirIt != DirEnd && !EC; DirIt.increment(EC)) {
	if (!llvm::sys::fs::is_directory(DirIt->path()))
	continue;
	StringRef CandidateName = llvm::sys::path::filename(DirIt->path());
	// If WDK is installed, there could be subfolders like "wdf" in the
	// "Include" directory.
	// Allow only directories which names start with "10.".
	if (!CandidateName.startswith("10."))
	continue;
	if (CandidateName > SDKVersion)
	SDKVersion = CandidateName;
	}

	return !SDKVersion.empty();
	}

	/// \brief Get Windows SDK installation directory.
	static bool getWindowsSDKDir(std::string &Path, int &Major,
	std::string &WindowsSDKIncludeVersion,
	std::string &WindowsSDKLibVersion) {
	std::string RegistrySDKVersion;
	// Try the Windows registry.
	if (!getSystemRegistryString(
	"SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\$VERSION",
	"InstallationFolder", Path, &RegistrySDKVersion))
	return false;
	if (Path.empty() \|\| RegistrySDKVersion.empty())
	return false;

	WindowsSDKIncludeVersion.clear();
	WindowsSDKLibVersion.clear();
	Major = 0;
	std::sscanf(RegistrySDKVersion.c_str(), "v%d.", &Major);
	if (Major <= 7)
	return true;
	if (Major == 8) {
	// Windows SDK 8.x installs libraries in a folder whose names depend on the
	// version of the OS you're targeting. By default choose the newest, which
	// usually corresponds to the version of the OS you've installed the SDK on.
	const char *Tests[] = {"winv6.3", "win8", "win7"};
	for (const char *Test : Tests) {
	llvm::SmallString<128> TestPath(Path);
	llvm::sys::path::append(TestPath, "Lib", Test);
	if (llvm::sys::fs::exists(TestPath.c_str())) {
	WindowsSDKLibVersion = Test;
	break;
	}
	}
	return !WindowsSDKLibVersion.empty();
	}
	if (Major == 10) {
	if (!getWindows10SDKVersionFromPath(Path, WindowsSDKIncludeVersion))
	return false;
	WindowsSDKLibVersion = WindowsSDKIncludeVersion;
	return true;
	}
	// Unsupported SDK version
	return false;
	}

	// Gets the library path required to link against the Windows SDK.
	bool MSVCToolChain::getWindowsSDKLibraryPath(std::string &path) const {
	std::string sdkPath;
	int sdkMajor = 0;
	std::string windowsSDKIncludeVersion;
	std::string windowsSDKLibVersion;

	path.clear();
	if (!getWindowsSDKDir(sdkPath, sdkMajor, windowsSDKIncludeVersion,
	windowsSDKLibVersion))
	return false;

	llvm::SmallString<128> libPath(sdkPath);
	llvm::sys::path::append(libPath, "Lib");
	if (sdkMajor >= 8) {
	llvm::sys::path::append(libPath, windowsSDKLibVersion, "um",
	llvmArchToWindowsSDKArch(getArch()));
	} else {
	switch (getArch()) {
	// In Windows SDK 7.x, x86 libraries are directly in the Lib folder.
	case llvm::Triple::x86:
	break;
	case llvm::Triple::x86_64:
	llvm::sys::path::append(libPath, "x64");
	break;
	case llvm::Triple::arm:
	// It is not necessary to link against Windows SDK 7.x when targeting ARM.
	return false;
	default:
	return false;
	}
	}

	path = libPath.str();
	return true;
	}

	// Check if the Include path of a specified version of Visual Studio contains
	// specific header files. If not, they are probably shipped with Universal CRT.
	bool MSVCToolChain::useUniversalCRT() const {
	llvm::SmallString<128> TestPath(
	getSubDirectoryPath(SubDirectoryType::Include));
	llvm::sys::path::append(TestPath, "stdlib.h");
	return !llvm::sys::fs::exists(TestPath);
	}

	static bool getUniversalCRTSdkDir(std::string &Path, std::string &UCRTVersion) {
	// vcvarsqueryregistry.bat for Visual Studio 2015 queries the registry
	// for the specific key "KitsRoot10". So do we.
	if (!getSystemRegistryString(
	"SOFTWARE\\Microsoft\\Windows Kits\\Installed Roots", "KitsRoot10",
	Path, nullptr))
	return false;

	return getWindows10SDKVersionFromPath(Path, UCRTVersion);
	}

	bool MSVCToolChain::getUniversalCRTLibraryPath(std::string &Path) const {
	std::string UniversalCRTSdkPath;
	std::string UCRTVersion;

	Path.clear();
	if (!getUniversalCRTSdkDir(UniversalCRTSdkPath, UCRTVersion))
	return false;

	StringRef ArchName = llvmArchToWindowsSDKArch(getArch());
	if (ArchName.empty())
	return false;

	llvm::SmallString<128> LibPath(UniversalCRTSdkPath);
	llvm::sys::path::append(LibPath, "Lib", UCRTVersion, "ucrt", ArchName);

	Path = LibPath.str();
	return true;
	}

	static VersionTuple getMSVCVersionFromTriple(const llvm::Triple &Triple) {
	unsigned Major, Minor, Micro;
	Triple.getEnvironmentVersion(Major, Minor, Micro);
	if (Major \|\| Minor \|\| Micro)
	return VersionTuple(Major, Minor, Micro);
	return VersionTuple();
	}

	static VersionTuple getMSVCVersionFromExe(const std::string &BinDir) {
	VersionTuple Version;
	#ifdef USE_WIN32
	SmallString<128> ClExe(BinDir);
	llvm::sys::path::append(ClExe, "cl.exe");

	std::wstring ClExeWide;
	if (!llvm::ConvertUTF8toWide(ClExe.c_str(), ClExeWide))
	return Version;

	const DWORD VersionSize = ::GetFileVersionInfoSizeW(ClExeWide.c_str(),
	nullptr);
	if (VersionSize == 0)
	return Version;

	SmallVector<uint8_t, 4 * 1024> VersionBlock(VersionSize);
	if (!::GetFileVersionInfoW(ClExeWide.c_str(), 0, VersionSize,
	VersionBlock.data()))
	return Version;

	VS_FIXEDFILEINFO *FileInfo = nullptr;
	UINT FileInfoSize = 0;
	if (!::VerQueryValueW(VersionBlock.data(), L"\\",
	reinterpret_cast<LPVOID *>(&FileInfo), &FileInfoSize) \|\|
	FileInfoSize < sizeof(*FileInfo))
	return Version;

	const unsigned Major = (FileInfo->dwFileVersionMS >> 16) & 0xFFFF;
	const unsigned Minor = (FileInfo->dwFileVersionMS ) & 0xFFFF;
	const unsigned Micro = (FileInfo->dwFileVersionLS >> 16) & 0xFFFF;

	Version = VersionTuple(Major, Minor, Micro);
	#endif
	return Version;
	}

	void MSVCToolChain::AddSystemIncludeWithSubfolder(
	const ArgList &DriverArgs, ArgStringList &CC1Args,
	const std::string &folder, const Twine &subfolder1, const Twine &subfolder2,
	const Twine &subfolder3) const {
	llvm::SmallString<128> path(folder);
	llvm::sys::path::append(path, subfolder1, subfolder2, subfolder3);
	addSystemInclude(DriverArgs, CC1Args, path);
	}

	void MSVCToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
	ArgStringList &CC1Args) const {
	if (DriverArgs.hasArg(options::OPT_nostdinc))
	return;

	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
	AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, getDriver().ResourceDir,
	"include");
	}

	// Add %INCLUDE%-like directories from the -imsvc flag.
	for (const auto &Path : DriverArgs.getAllArgValues(options::OPT__SLASH_imsvc))
	addSystemInclude(DriverArgs, CC1Args, Path);

	if (DriverArgs.hasArg(options::OPT_nostdlibinc))
	return;

	// Honor %INCLUDE%. It should know essential search paths with vcvarsall.bat.
	if (llvm::Optional<std::string> cl_include_dir =
	llvm::sys::Process::GetEnv("INCLUDE")) {
	SmallVector<StringRef, 8> Dirs;
	StringRef(*cl_include_dir)
	.split(Dirs, ";", /MaxSplit=/-1, /KeepEmpty=/false);
	for (StringRef Dir : Dirs)
	addSystemInclude(DriverArgs, CC1Args, Dir);
	if (!Dirs.empty())
	return;
	}

	// When built with access to the proper Windows APIs, try to actually find
	// the correct include paths first.
	if (!VCToolChainPath.empty()) {
	addSystemInclude(DriverArgs, CC1Args,
	getSubDirectoryPath(SubDirectoryType::Include));

	if (useUniversalCRT()) {
	std::string UniversalCRTSdkPath;
	std::string UCRTVersion;
	if (getUniversalCRTSdkDir(UniversalCRTSdkPath, UCRTVersion)) {
	AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, UniversalCRTSdkPath,
	"Include", UCRTVersion, "ucrt");
	}
	}

	std::string WindowsSDKDir;
	int major;
	std::string windowsSDKIncludeVersion;
	std::string windowsSDKLibVersion;
	if (getWindowsSDKDir(WindowsSDKDir, major, windowsSDKIncludeVersion,
	windowsSDKLibVersion)) {
	if (major >= 8) {
	// Note: windowsSDKIncludeVersion is empty for SDKs prior to v10.
	// Anyway, llvm::sys::path::append is able to manage it.
	AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, WindowsSDKDir,
	"include", windowsSDKIncludeVersion,
	"shared");
	AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, WindowsSDKDir,
	"include", windowsSDKIncludeVersion,
	"um");
	AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, WindowsSDKDir,
	"include", windowsSDKIncludeVersion,
	"winrt");
	} else {
	AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, WindowsSDKDir,
	"include");
	}
	}

	return;
	}

	#if defined(LLVM_ON_WIN32)
	// As a fallback, select default install paths.
	// FIXME: Don't guess drives and paths like this on Windows.
	const StringRef Paths[] = {
	"C:/Program Files/Microsoft Visual Studio 10.0/VC/include",
	"C:/Program Files/Microsoft Visual Studio 9.0/VC/include",
	"C:/Program Files/Microsoft Visual Studio 9.0/VC/PlatformSDK/Include",
	"C:/Program Files/Microsoft Visual Studio 8/VC/include",
	"C:/Program Files/Microsoft Visual Studio 8/VC/PlatformSDK/Include"
	};
	addSystemIncludes(DriverArgs, CC1Args, Paths);
	#endif
	}

	void MSVCToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
	ArgStringList &CC1Args) const {
	// FIXME: There should probably be logic here to find libc++ on Windows.
	}

	VersionTuple MSVCToolChain::computeMSVCVersion(const Driver *D,
	const ArgList &Args) const {
	bool IsWindowsMSVC = getTriple().isWindowsMSVCEnvironment();
	VersionTuple MSVT = ToolChain::computeMSVCVersion(D, Args);
	if (MSVT.empty())
	MSVT = getMSVCVersionFromTriple(getTriple());
	if (MSVT.empty() && IsWindowsMSVC)
	MSVT = getMSVCVersionFromExe(getSubDirectoryPath(SubDirectoryType::Bin));
	if (MSVT.empty() &&
	Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
	IsWindowsMSVC)) {
	// -fms-compatibility-version=18.00 is default.
	// FIXME: Consider bumping this to 19 (MSVC2015) soon.
	MSVT = VersionTuple(18);
	}
	return MSVT;
	}

	std::string
	MSVCToolChain::ComputeEffectiveClangTriple(const ArgList &Args,
	types::ID InputType) const {
	// The MSVC version doesn't care about the architecture, even though it
	// may look at the triple internally.
	VersionTuple MSVT = computeMSVCVersion(/D=/nullptr, Args);
	MSVT = VersionTuple(MSVT.getMajor(), MSVT.getMinor().getValueOr(0),
	MSVT.getSubminor().getValueOr(0));

	// For the rest of the triple, however, a computed architecture name may
	// be needed.
	llvm::Triple Triple(ToolChain::ComputeEffectiveClangTriple(Args, InputType));
	if (Triple.getEnvironment() == llvm::Triple::MSVC) {
	StringRef ObjFmt = Triple.getEnvironmentName().split('-').second;
	if (ObjFmt.empty())
	Triple.setEnvironmentName((Twine("msvc") + MSVT.getAsString()).str());
	else
	Triple.setEnvironmentName(
	(Twine("msvc") + MSVT.getAsString() + Twine('-') + ObjFmt).str());
	}
	return Triple.getTriple();
	}

	SanitizerMask MSVCToolChain::getSupportedSanitizers() const {
	SanitizerMask Res = ToolChain::getSupportedSanitizers();
	Res \|= SanitizerKind::Address;
	return Res;
	}

	static void TranslateOptArg(Arg *A, llvm::opt::DerivedArgList &DAL,
	bool SupportsForcingFramePointer,
	const char *ExpandChar, const OptTable &Opts) {
	assert(A->getOption().matches(options::OPT__SLASH_O));

	StringRef OptStr = A->getValue();
	for (size_t I = 0, E = OptStr.size(); I != E; ++I) {
	const char &OptChar = *(OptStr.data() + I);
	switch (OptChar) {
	default:
	break;
	case '1':
	case '2':
	case 'x':
	case 'd':
	if (&OptChar == ExpandChar) {
	if (OptChar == 'd') {
	DAL.AddFlagArg(A, Opts.getOption(options::OPT_O0));
	} else {
	if (OptChar == '1') {
	DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "s");
	} else if (OptChar == '2' \|\| OptChar == 'x') {
	DAL.AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin));
	DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "2");
	}
	if (SupportsForcingFramePointer &&
	!DAL.hasArgNoClaim(options::OPT_fno_omit_frame_pointer))
	DAL.AddFlagArg(A,
	Opts.getOption(options::OPT_fomit_frame_pointer));
	if (OptChar == '1' \|\| OptChar == '2')
	DAL.AddFlagArg(A,
	Opts.getOption(options::OPT_ffunction_sections));
	}
	}
	break;
	case 'b':
	if (I + 1 != E && isdigit(OptStr[I + 1])) {
	switch (OptStr[I + 1]) {
	case '0':
	DAL.AddFlagArg(A, Opts.getOption(options::OPT_fno_inline));
	break;
	case '1':
	DAL.AddFlagArg(A, Opts.getOption(options::OPT_finline_hint_functions));
	break;
	case '2':
	DAL.AddFlagArg(A, Opts.getOption(options::OPT_finline_functions));
	break;
	}
	++I;
	}
	break;
	case 'g':
	break;
	case 'i':
	if (I + 1 != E && OptStr[I + 1] == '-') {
	++I;
	DAL.AddFlagArg(A, Opts.getOption(options::OPT_fno_builtin));
	} else {
	DAL.AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin));
	}
	break;
	case 's':
	DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "s");
	break;
	case 't':
	DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "2");
	break;
	case 'y': {
	bool OmitFramePointer = true;
	if (I + 1 != E && OptStr[I + 1] == '-') {
	OmitFramePointer = false;
	++I;
	}
	if (SupportsForcingFramePointer) {
	if (OmitFramePointer)
	DAL.AddFlagArg(A,
	Opts.getOption(options::OPT_fomit_frame_pointer));
	else
	DAL.AddFlagArg(
	A, Opts.getOption(options::OPT_fno_omit_frame_pointer));
	} else {
	// Don't warn about /Oy- in 64-bit builds (where
	// SupportsForcingFramePointer is false). The flag having no effect
	// there is a compiler-internal optimization, and people shouldn't have
	// to special-case their build files for 64-bit clang-cl.
	A->claim();
	}
	break;
	}
	}
	}
	}

	static void TranslateDArg(Arg *A, llvm::opt::DerivedArgList &DAL,
	const OptTable &Opts) {
	assert(A->getOption().matches(options::OPT_D));

	StringRef Val = A->getValue();
	size_t Hash = Val.find('#');
	if (Hash == StringRef::npos \|\| Hash > Val.find('=')) {
	DAL.append(A);
	return;
	}

	std::string NewVal = Val;
	NewVal[Hash] = '=';
	DAL.AddJoinedArg(A, Opts.getOption(options::OPT_D), NewVal);
	}

	llvm::opt::DerivedArgList *
	MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
	StringRef BoundArch, Action::OffloadKind) const {
	DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
	const OptTable &Opts = getDriver().getOpts();

	// /Oy and /Oy- only has an effect under X86-32.
	bool SupportsForcingFramePointer = getArch() == llvm::Triple::x86;

	// The -O[12xd] flag actually expands to several flags. We must desugar the
	// flags so that options embedded can be negated. For example, the '-O2' flag
	// enables '-Oy'. Expanding '-O2' into its constituent flags allows us to
	// correctly handle '-O2 -Oy-' where the trailing '-Oy-' disables a single
	// aspect of '-O2'.
	//
	// Note that this expansion logic only applies to the last of '[12xd]'.

	// First step is to search for the character we'd like to expand.
	const char *ExpandChar = nullptr;
	for (Arg *A : Args) {
	if (!A->getOption().matches(options::OPT__SLASH_O))
	continue;
	StringRef OptStr = A->getValue();
	for (size_t I = 0, E = OptStr.size(); I != E; ++I) {
	char OptChar = OptStr[I];
	char PrevChar = I > 0 ? OptStr[I - 1] : '0';
	if (PrevChar == 'b') {
	// OptChar does not expand; it's an argument to the previous char.
	continue;
	}
	if (OptChar == '1' \|\| OptChar == '2' \|\| OptChar == 'x' \|\| OptChar == 'd')
	ExpandChar = OptStr.data() + I;
	}
	}

	for (Arg *A : Args) {
	if (A->getOption().matches(options::OPT__SLASH_O)) {
	// The -O flag actually takes an amalgam of other options. For example,
	// '/Ogyb2' is equivalent to '/Og' '/Oy' '/Ob2'.
	TranslateOptArg(A, *DAL, SupportsForcingFramePointer, ExpandChar, Opts);
	} else if (A->getOption().matches(options::OPT_D)) {
	// Translate -Dfoo#bar into -Dfoo=bar.
	TranslateDArg(A, *DAL, Opts);
	} else {
	DAL->append(A);
	}
	}

	return DAL;
	}
	Index: head/contrib/llvm/tools/clang/lib/Driver/ToolChains/MSVC.h
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Driver/ToolChains/MSVC.h (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/Driver/ToolChains/MSVC.h (revision 322855)
	@@ -1,141 +1,146 @@
	//===--- MSVC.h - MSVC ToolChain Implementations ----------------- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_MSVC_H
	#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_MSVC_H

	#include "Cuda.h"
	#include "clang/Driver/Compilation.h"
	#include "clang/Driver/Tool.h"
	#include "clang/Driver/ToolChain.h"

	namespace clang {
	namespace driver {
	namespace tools {

	/// Visual studio tools.
	namespace visualstudio {
	class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
	public:
	Linker(const ToolChain &TC)
	: Tool("visualstudio::Linker", "linker", TC, RF_Full,
	llvm::sys::WEM_UTF16) {}

	bool hasIntegratedCPP() const override { return false; }
	bool isLinkJob() const override { return true; }

	void ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output, const InputInfoList &Inputs,
	const llvm::opt::ArgList &TCArgs,
	const char *LinkingOutput) const override;
	};

	class LLVM_LIBRARY_VISIBILITY Compiler : public Tool {
	public:
	Compiler(const ToolChain &TC)
	: Tool("visualstudio::Compiler", "compiler", TC, RF_Full,
	llvm::sys::WEM_UTF16) {}

	bool hasIntegratedAssembler() const override { return true; }
	bool hasIntegratedCPP() const override { return true; }
	bool isLinkJob() const override { return false; }

	void ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output, const InputInfoList &Inputs,
	const llvm::opt::ArgList &TCArgs,
	const char *LinkingOutput) const override;

	std::unique_ptr<Command> GetCommand(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const llvm::opt::ArgList &TCArgs,
	const char *LinkingOutput) const;
	};
	} // end namespace visualstudio

	} // end namespace tools

	namespace toolchains {

	class LLVM_LIBRARY_VISIBILITY MSVCToolChain : public ToolChain {
	public:
	MSVCToolChain(const Driver &D, const llvm::Triple &Triple,
	const llvm::opt::ArgList &Args);

	llvm::opt::DerivedArgList *
	TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
	Action::OffloadKind DeviceOffloadKind) const override;

	bool IsIntegratedAssemblerDefault() const override;
	bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override;
	bool isPICDefault() const override;
	bool isPIEDefault() const override;
	bool isPICDefaultForced() const override;

	enum class SubDirectoryType {
	Bin,
	Include,
	Lib,
	};
	std::string getSubDirectoryPath(SubDirectoryType Type,
	llvm::Triple::ArchType TargetArch) const;

	// Convenience overload.
	// Uses the current target arch.
	std::string getSubDirectoryPath(SubDirectoryType Type) const {
	return getSubDirectoryPath(Type, getArch());
	}

	- bool getIsVS2017OrNewer() const { return IsVS2017OrNewer; }
	+ enum class ToolsetLayout {
	+ OlderVS,
	+ VS2017OrNewer,
	+ DevDivInternal,
	+ };
	+ bool getIsVS2017OrNewer() const { return VSLayout == ToolsetLayout::VS2017OrNewer; }

	void
	AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args) const override;
	void AddClangCXXStdlibIncludeArgs(
	const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args) const override;

	void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args) const override;

	bool getWindowsSDKLibraryPath(std::string &path) const;
	/// \brief Check if Universal CRT should be used if available
	bool getUniversalCRTLibraryPath(std::string &path) const;
	bool useUniversalCRT() const;
	VersionTuple
	computeMSVCVersion(const Driver *D,
	const llvm::opt::ArgList &Args) const override;

	std::string ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args,
	types::ID InputType) const override;
	SanitizerMask getSupportedSanitizers() const override;

	void printVerboseInfo(raw_ostream &OS) const override;

	protected:
	void AddSystemIncludeWithSubfolder(const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args,
	const std::string &folder,
	const Twine &subfolder1,
	const Twine &subfolder2 = "",
	const Twine &subfolder3 = "") const;

	Tool *buildLinker() const override;
	Tool *buildAssembler() const override;
	private:
	std::string VCToolChainPath;
	- bool IsVS2017OrNewer = false;
	+ ToolsetLayout VSLayout = ToolsetLayout::OlderVS;
	CudaInstallationDetector CudaInstallation;
	};

	} // end namespace toolchains
	} // end namespace driver
	} // end namespace clang

	#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_MSVC_H
	Index: head/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp (revision 322855)
	@@ -1,701 +1,706 @@
	//===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// \brief This file implements WhitespaceManager class.
	///
	//===----------------------------------------------------------------------===//

	#include "WhitespaceManager.h"
	#include "llvm/ADT/STLExtras.h"

	namespace clang {
	namespace format {

	bool WhitespaceManager::Change::IsBeforeInFile::
	operator()(const Change &C1, const Change &C2) const {
	return SourceMgr.isBeforeInTranslationUnit(
	C1.OriginalWhitespaceRange.getBegin(),
	C2.OriginalWhitespaceRange.getBegin());
	}

	WhitespaceManager::Change::Change(const FormatToken &Tok,
	bool CreateReplacement,
	SourceRange OriginalWhitespaceRange,
	int Spaces, unsigned StartOfTokenColumn,
	unsigned NewlinesBefore,
	StringRef PreviousLinePostfix,
	StringRef CurrentLinePrefix,
	bool ContinuesPPDirective, bool IsInsideToken)
	: Tok(&Tok), CreateReplacement(CreateReplacement),
	OriginalWhitespaceRange(OriginalWhitespaceRange),
	StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
	PreviousLinePostfix(PreviousLinePostfix),
	CurrentLinePrefix(CurrentLinePrefix),
	ContinuesPPDirective(ContinuesPPDirective), Spaces(Spaces),
	IsInsideToken(IsInsideToken), IsTrailingComment(false), TokenLength(0),
	PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
	StartOfBlockComment(nullptr), IndentationOffset(0) {}

	void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
	unsigned Spaces,
	unsigned StartOfTokenColumn,
	bool InPPDirective) {
	if (Tok.Finalized)
	return;
	Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
	Changes.push_back(Change(Tok, /CreateReplacement=/true, Tok.WhitespaceRange,
	Spaces, StartOfTokenColumn, Newlines, "", "",
	InPPDirective && !Tok.IsFirst,
	/IsInsideToken=/false));
	}

	void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
	bool InPPDirective) {
	if (Tok.Finalized)
	return;
	Changes.push_back(Change(Tok, /CreateReplacement=/false,
	Tok.WhitespaceRange, /Spaces=/0,
	Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
	InPPDirective && !Tok.IsFirst,
	/IsInsideToken=/false));
	}

	void WhitespaceManager::replaceWhitespaceInToken(
	const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
	StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
	unsigned Newlines, int Spaces) {
	if (Tok.Finalized)
	return;
	SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
	Changes.push_back(
	Change(Tok, /CreateReplacement=/true,
	SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), Spaces,
	std::max(0, Spaces), Newlines, PreviousPostfix, CurrentPrefix,
	InPPDirective && !Tok.IsFirst, /IsInsideToken=/true));
	}

	const tooling::Replacements &WhitespaceManager::generateReplacements() {
	if (Changes.empty())
	return Replaces;

	std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
	calculateLineBreakInformation();
	alignConsecutiveDeclarations();
	alignConsecutiveAssignments();
	alignTrailingComments();
	alignEscapedNewlines();
	generateChanges();

	return Replaces;
	}

	void WhitespaceManager::calculateLineBreakInformation() {
	Changes[0].PreviousEndOfTokenColumn = 0;
	Change *LastOutsideTokenChange = &Changes[0];
	for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
	SourceLocation OriginalWhitespaceStart =
	Changes[i].OriginalWhitespaceRange.getBegin();
	SourceLocation PreviousOriginalWhitespaceEnd =
	Changes[i - 1].OriginalWhitespaceRange.getEnd();
	unsigned OriginalWhitespaceStartOffset =
	SourceMgr.getFileOffset(OriginalWhitespaceStart);
	unsigned PreviousOriginalWhitespaceEndOffset =
	SourceMgr.getFileOffset(PreviousOriginalWhitespaceEnd);
	assert(PreviousOriginalWhitespaceEndOffset <=
	OriginalWhitespaceStartOffset);
	const char *const PreviousOriginalWhitespaceEndData =
	SourceMgr.getCharacterData(PreviousOriginalWhitespaceEnd);
	StringRef Text(PreviousOriginalWhitespaceEndData,
	SourceMgr.getCharacterData(OriginalWhitespaceStart) -
	PreviousOriginalWhitespaceEndData);
	// Usually consecutive changes would occur in consecutive tokens. This is
	// not the case however when analyzing some preprocessor runs of the
	// annotated lines. For example, in this code:
	//
	// #if A // line 1
	// int i = 1;
	// #else B // line 2
	// int i = 2;
	// #endif // line 3
	//
	// one of the runs will produce the sequence of lines marked with line 1, 2
	// and 3. So the two consecutive whitespace changes just before '// line 2'
	// and before '#endif // line 3' span multiple lines and tokens:
	//
	// #else B{change X}[// line 2
	// int i = 2;
	// ]{change Y}#endif // line 3
	//
	// For this reason, if the text between consecutive changes spans multiple
	// newlines, the token length must be adjusted to the end of the original
	// line of the token.
	auto NewlinePos = Text.find_first_of('\n');
	if (NewlinePos == StringRef::npos) {
	Changes[i - 1].TokenLength = OriginalWhitespaceStartOffset -
	PreviousOriginalWhitespaceEndOffset +
	Changes[i].PreviousLinePostfix.size() +
	Changes[i - 1].CurrentLinePrefix.size();
	} else {
	Changes[i - 1].TokenLength =
	NewlinePos + Changes[i - 1].CurrentLinePrefix.size();
	}

	// If there are multiple changes in this token, sum up all the changes until
	// the end of the line.
	if (Changes[i - 1].IsInsideToken && Changes[i - 1].NewlinesBefore == 0)
	LastOutsideTokenChange->TokenLength +=
	Changes[i - 1].TokenLength + Changes[i - 1].Spaces;
	else
	LastOutsideTokenChange = &Changes[i - 1];

	Changes[i].PreviousEndOfTokenColumn =
	Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;

	Changes[i - 1].IsTrailingComment =
	(Changes[i].NewlinesBefore > 0 \|\| Changes[i].Tok->is(tok::eof) \|\|
	(Changes[i].IsInsideToken && Changes[i].Tok->is(tok::comment))) &&
	Changes[i - 1].Tok->is(tok::comment) &&
	// FIXME: This is a dirty hack. The problem is that
	// BreakableLineCommentSection does comment reflow changes and here is
	// the aligning of trailing comments. Consider the case where we reflow
	// the second line up in this example:
	//
	// // line 1
	// // line 2
	//
	// That amounts to 2 changes by BreakableLineCommentSection:
	// - the first, delimited by (), for the whitespace between the tokens,
	// - and second, delimited by [], for the whitespace at the beginning
	// of the second token:
	//
	// // line 1(
	// )[// ]line 2
	//
	// So in the end we have two changes like this:
	//
	// // line1()[ ]line 2
	//
	// Note that the OriginalWhitespaceStart of the second change is the
	// same as the PreviousOriginalWhitespaceEnd of the first change.
	// In this case, the below check ensures that the second change doesn't
	// get treated as a trailing comment change here, since this might
	// trigger additional whitespace to be wrongly inserted before "line 2"
	// by the comment aligner here.
	//
	// For a proper solution we need a mechanism to say to WhitespaceManager
	// that a particular change breaks the current sequence of trailing
	// comments.
	OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd;
	}
	// FIXME: The last token is currently not always an eof token; in those
	// cases, setting TokenLength of the last token to 0 is wrong.
	Changes.back().TokenLength = 0;
	Changes.back().IsTrailingComment = Changes.back().Tok->is(tok::comment);

	const WhitespaceManager::Change *LastBlockComment = nullptr;
	for (auto &Change : Changes) {
	// Reset the IsTrailingComment flag for changes inside of trailing comments
	// so they don't get realigned later. Comment line breaks however still need
	// to be aligned.
	if (Change.IsInsideToken && Change.NewlinesBefore == 0)
	Change.IsTrailingComment = false;
	Change.StartOfBlockComment = nullptr;
	Change.IndentationOffset = 0;
	if (Change.Tok->is(tok::comment)) {
	if (Change.Tok->is(TT_LineComment) \|\| !Change.IsInsideToken)
	LastBlockComment = &Change;
	else {
	if ((Change.StartOfBlockComment = LastBlockComment))
	Change.IndentationOffset =
	Change.StartOfTokenColumn -
	Change.StartOfBlockComment->StartOfTokenColumn;
	}
	} else {
	LastBlockComment = nullptr;
	}
	}
	}

	// Align a single sequence of tokens, see AlignTokens below.
	template <typename F>
	static void
	AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
	SmallVector<WhitespaceManager::Change, 16> &Changes) {
	bool FoundMatchOnLine = false;
	int Shift = 0;

	// ScopeStack keeps track of the current scope depth. It contains indices of
	// the first token on each scope.
	// We only run the "Matches" function on tokens from the outer-most scope.
	// However, we do need to pay special attention to one class of tokens
	// that are not in the outer-most scope, and that is function parameters
	// which are split across multiple lines, as illustrated by this example:
	// double a(int x);
	// int b(int y,
	// double z);
	// In the above example, we need to take special care to ensure that
	// 'double z' is indented along with it's owning function 'b'.
	SmallVector<unsigned, 16> ScopeStack;

	for (unsigned i = Start; i != End; ++i) {
	if (ScopeStack.size() != 0 &&
	Changes[i].nestingAndIndentLevel() <
	Changes[ScopeStack.back()].nestingAndIndentLevel())
	ScopeStack.pop_back();

	if (i != Start && Changes[i].nestingAndIndentLevel() >
	Changes[i - 1].nestingAndIndentLevel())
	ScopeStack.push_back(i);

	bool InsideNestedScope = ScopeStack.size() != 0;

	if (Changes[i].NewlinesBefore > 0 && !InsideNestedScope) {
	Shift = 0;
	FoundMatchOnLine = false;
	}

	// If this is the first matching token to be aligned, remember by how many
	// spaces it has to be shifted, so the rest of the changes on the line are
	// shifted by the same amount
	if (!FoundMatchOnLine && !InsideNestedScope && Matches(Changes[i])) {
	FoundMatchOnLine = true;
	Shift = Column - Changes[i].StartOfTokenColumn;
	Changes[i].Spaces += Shift;
	}

	// This is for function parameters that are split across multiple lines,
	// as mentioned in the ScopeStack comment.
	if (InsideNestedScope && Changes[i].NewlinesBefore > 0) {
	unsigned ScopeStart = ScopeStack.back();
	if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName) \|\|
	(ScopeStart > Start + 1 &&
	Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName)))
	Changes[i].Spaces += Shift;
	}

	assert(Shift >= 0);
	Changes[i].StartOfTokenColumn += Shift;
	if (i + 1 != Changes.size())
	Changes[i + 1].PreviousEndOfTokenColumn += Shift;
	}
	}

	// Walk through a subset of the changes, starting at StartAt, and find
	// sequences of matching tokens to align. To do so, keep track of the lines and
	// whether or not a matching token was found on a line. If a matching token is
	// found, extend the current sequence. If the current line cannot be part of a
	// sequence, e.g. because there is an empty line before it or it contains only
	// non-matching tokens, finalize the previous sequence.
	// The value returned is the token on which we stopped, either because we
	// exhausted all items inside Changes, or because we hit a scope level higher
	// than our initial scope.
	// This function is recursive. Each invocation processes only the scope level
	// equal to the initial level, which is the level of Changes[StartAt].
	// If we encounter a scope level greater than the initial level, then we call
	// ourselves recursively, thereby avoiding the pollution of the current state
	// with the alignment requirements of the nested sub-level. This recursive
	// behavior is necessary for aligning function prototypes that have one or more
	// arguments.
	// If this function encounters a scope level less than the initial level,
	// it returns the current position.
	// There is a non-obvious subtlety in the recursive behavior: Even though we
	// defer processing of nested levels to recursive invocations of this
	// function, when it comes time to align a sequence of tokens, we run the
	// alignment on the entire sequence, including the nested levels.
	// When doing so, most of the nested tokens are skipped, because their
	// alignment was already handled by the recursive invocations of this function.
	// However, the special exception is that we do NOT skip function parameters
	// that are split across multiple lines. See the test case in FormatTest.cpp
	// that mentions "split function parameter alignment" for an example of this.
	template <typename F>
	static unsigned AlignTokens(const FormatStyle &Style, F &&Matches,
	SmallVector<WhitespaceManager::Change, 16> &Changes,
	unsigned StartAt) {
	unsigned MinColumn = 0;
	unsigned MaxColumn = UINT_MAX;

	// Line number of the start and the end of the current token sequence.
	unsigned StartOfSequence = 0;
	unsigned EndOfSequence = 0;

	// Measure the scope level (i.e. depth of (), [], {}) of the first token, and
	// abort when we hit any token in a higher scope than the starting one.
	auto NestingAndIndentLevel = StartAt < Changes.size()
	? Changes[StartAt].nestingAndIndentLevel()
	: std::pair<unsigned, unsigned>(0, 0);

	// Keep track of the number of commas before the matching tokens, we will only
	// align a sequence of matching tokens if they are preceded by the same number
	// of commas.
	unsigned CommasBeforeLastMatch = 0;
	unsigned CommasBeforeMatch = 0;

	// Whether a matching token has been found on the current line.
	bool FoundMatchOnLine = false;

	// Aligns a sequence of matching tokens, on the MinColumn column.
	//
	// Sequences start from the first matching token to align, and end at the
	// first token of the first line that doesn't need to be aligned.
	//
	// We need to adjust the StartOfTokenColumn of each Change that is on a line
	// containing any matching token to be aligned and located after such token.
	auto AlignCurrentSequence = [&] {
	if (StartOfSequence > 0 && StartOfSequence < EndOfSequence)
	AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches,
	Changes);
	MinColumn = 0;
	MaxColumn = UINT_MAX;
	StartOfSequence = 0;
	EndOfSequence = 0;
	};

	unsigned i = StartAt;
	for (unsigned e = Changes.size(); i != e; ++i) {
	if (Changes[i].nestingAndIndentLevel() < NestingAndIndentLevel)
	break;

	if (Changes[i].NewlinesBefore != 0) {
	CommasBeforeMatch = 0;
	EndOfSequence = i;
	// If there is a blank line, or if the last line didn't contain any
	// matching token, the sequence ends here.
	if (Changes[i].NewlinesBefore > 1 \|\| !FoundMatchOnLine)
	AlignCurrentSequence();

	FoundMatchOnLine = false;
	}

	if (Changes[i].Tok->is(tok::comma)) {
	++CommasBeforeMatch;
	} else if (Changes[i].nestingAndIndentLevel() > NestingAndIndentLevel) {
	// Call AlignTokens recursively, skipping over this scope block.
	unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i);
	i = StoppedAt - 1;
	continue;
	}

	if (!Matches(Changes[i]))
	continue;

	// If there is more than one matching token per line, or if the number of
	// preceding commas, do not match anymore, end the sequence.
	if (FoundMatchOnLine \|\| CommasBeforeMatch != CommasBeforeLastMatch)
	AlignCurrentSequence();

	CommasBeforeLastMatch = CommasBeforeMatch;
	FoundMatchOnLine = true;

	if (StartOfSequence == 0)
	StartOfSequence = i;

	unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
	int LineLengthAfter = -Changes[i].Spaces;
	for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j)
	LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength;
	unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter;

	// If we are restricted by the maximum column width, end the sequence.
	if (ChangeMinColumn > MaxColumn \|\| ChangeMaxColumn < MinColumn \|\|
	CommasBeforeLastMatch != CommasBeforeMatch) {
	AlignCurrentSequence();
	StartOfSequence = i;
	}

	MinColumn = std::max(MinColumn, ChangeMinColumn);
	MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
	}

	EndOfSequence = i;
	AlignCurrentSequence();
	return i;
	}

	void WhitespaceManager::alignConsecutiveAssignments() {
	if (!Style.AlignConsecutiveAssignments)
	return;

	AlignTokens(Style,
	[&](const Change &C) {
	// Do not align on equal signs that are first on a line.
	if (C.NewlinesBefore > 0)
	return false;

	// Do not align on equal signs that are last on a line.
	if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0)
	return false;

	return C.Tok->is(tok::equal);
	},
	Changes, /StartAt=/0);
	}

	void WhitespaceManager::alignConsecutiveDeclarations() {
	if (!Style.AlignConsecutiveDeclarations)
	return;

	// FIXME: Currently we don't handle properly the PointerAlignment: Right
	// The * and & are not aligned and are left dangling. Something has to be done
	// about it, but it raises the question of alignment of code like:
	// const char* const* v1;
	// float const* v2;
	// SomeVeryLongType const& v3;
	AlignTokens(Style,
	[](Change const &C) {
	// tok::kw_operator is necessary for aligning operator overload
	// definitions.
	return C.Tok->is(TT_StartOfName) \|\|
	C.Tok->is(TT_FunctionDeclarationName) \|\|
	C.Tok->is(tok::kw_operator);
	},
	Changes, /StartAt=/0);
	}

	void WhitespaceManager::alignTrailingComments() {
	unsigned MinColumn = 0;
	unsigned MaxColumn = UINT_MAX;
	unsigned StartOfSequence = 0;
	bool BreakBeforeNext = false;
	unsigned Newlines = 0;
	for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
	if (Changes[i].StartOfBlockComment)
	continue;
	Newlines += Changes[i].NewlinesBefore;
	if (!Changes[i].IsTrailingComment)
	continue;

	unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
	- unsigned ChangeMaxColumn = Style.ColumnLimit >= Changes[i].TokenLength
	- ? Style.ColumnLimit - Changes[i].TokenLength
	- : ChangeMinColumn;
	+ unsigned ChangeMaxColumn;
	+
	+ if (Style.ColumnLimit == 0)
	+ ChangeMaxColumn = UINT_MAX;
	+ else if (Style.ColumnLimit >= Changes[i].TokenLength)
	+ ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
	+ else
	+ ChangeMaxColumn = ChangeMinColumn;

	// If we don't create a replacement for this change, we have to consider
	// it to be immovable.
	if (!Changes[i].CreateReplacement)
	ChangeMaxColumn = ChangeMinColumn;

	if (i + 1 != e && Changes[i + 1].ContinuesPPDirective)
	ChangeMaxColumn -= 2;
	// If this comment follows an } in column 0, it probably documents the
	// closing of a namespace and we don't want to align it.
	bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
	Changes[i - 1].Tok->is(tok::r_brace) &&
	Changes[i - 1].StartOfTokenColumn == 0;
	bool WasAlignedWithStartOfNextLine = false;
	if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
	unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
	Changes[i].OriginalWhitespaceRange.getEnd());
	for (unsigned j = i + 1; j != e; ++j) {
	if (Changes[j].Tok->is(tok::comment))
	continue;

	unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
	Changes[j].OriginalWhitespaceRange.getEnd());
	// The start of the next token was previously aligned with the
	// start of this comment.
	WasAlignedWithStartOfNextLine =
	CommentColumn == NextColumn \|\|
	CommentColumn == NextColumn + Style.IndentWidth;
	break;
	}
	}
	if (!Style.AlignTrailingComments \|\| FollowsRBraceInColumn0) {
	alignTrailingComments(StartOfSequence, i, MinColumn);
	MinColumn = ChangeMinColumn;
	MaxColumn = ChangeMinColumn;
	StartOfSequence = i;
	} else if (BreakBeforeNext \|\| Newlines > 1 \|\|
	(ChangeMinColumn > MaxColumn \|\| ChangeMaxColumn < MinColumn) \|\|
	// Break the comment sequence if the previous line did not end
	// in a trailing comment.
	(Changes[i].NewlinesBefore == 1 && i > 0 &&
	!Changes[i - 1].IsTrailingComment) \|\|
	WasAlignedWithStartOfNextLine) {
	alignTrailingComments(StartOfSequence, i, MinColumn);
	MinColumn = ChangeMinColumn;
	MaxColumn = ChangeMaxColumn;
	StartOfSequence = i;
	} else {
	MinColumn = std::max(MinColumn, ChangeMinColumn);
	MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
	}
	BreakBeforeNext =
	(i == 0) \|\| (Changes[i].NewlinesBefore > 1) \|\|
	// Never start a sequence with a comment at the beginning of
	// the line.
	(Changes[i].NewlinesBefore == 1 && StartOfSequence == i);
	Newlines = 0;
	}
	alignTrailingComments(StartOfSequence, Changes.size(), MinColumn);
	}

	void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
	unsigned Column) {
	for (unsigned i = Start; i != End; ++i) {
	int Shift = 0;
	if (Changes[i].IsTrailingComment) {
	Shift = Column - Changes[i].StartOfTokenColumn;
	}
	if (Changes[i].StartOfBlockComment) {
	Shift = Changes[i].IndentationOffset +
	Changes[i].StartOfBlockComment->StartOfTokenColumn -
	Changes[i].StartOfTokenColumn;
	}
	assert(Shift >= 0);
	Changes[i].Spaces += Shift;
	if (i + 1 != Changes.size())
	Changes[i + 1].PreviousEndOfTokenColumn += Shift;
	Changes[i].StartOfTokenColumn += Shift;
	}
	}

	void WhitespaceManager::alignEscapedNewlines() {
	if (Style.AlignEscapedNewlines == FormatStyle::ENAS_DontAlign)
	return;

	bool AlignLeft = Style.AlignEscapedNewlines == FormatStyle::ENAS_Left;
	unsigned MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit;
	unsigned StartOfMacro = 0;
	for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
	Change &C = Changes[i];
	if (C.NewlinesBefore > 0) {
	if (C.ContinuesPPDirective) {
	MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
	} else {
	alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
	MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit;
	StartOfMacro = i;
	}
	}
	}
	alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine);
	}

	void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
	unsigned Column) {
	for (unsigned i = Start; i < End; ++i) {
	Change &C = Changes[i];
	if (C.NewlinesBefore > 0) {
	assert(C.ContinuesPPDirective);
	if (C.PreviousEndOfTokenColumn + 1 > Column)
	C.EscapedNewlineColumn = 0;
	else
	C.EscapedNewlineColumn = Column;
	}
	}
	}

	void WhitespaceManager::generateChanges() {
	for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
	const Change &C = Changes[i];
	if (i > 0) {
	assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
	C.OriginalWhitespaceRange.getBegin() &&
	"Generating two replacements for the same location");
	}
	if (C.CreateReplacement) {
	std::string ReplacementText = C.PreviousLinePostfix;
	if (C.ContinuesPPDirective)
	appendNewlineText(ReplacementText, C.NewlinesBefore,
	C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);
	else
	appendNewlineText(ReplacementText, C.NewlinesBefore);
	appendIndentText(ReplacementText, C.Tok->IndentLevel,
	std::max(0, C.Spaces),
	C.StartOfTokenColumn - std::max(0, C.Spaces));
	ReplacementText.append(C.CurrentLinePrefix);
	storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
	}
	}
	}

	void WhitespaceManager::storeReplacement(SourceRange Range,
	StringRef Text) {
	unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
	SourceMgr.getFileOffset(Range.getBegin());
	// Don't create a replacement, if it does not change anything.
	if (StringRef(SourceMgr.getCharacterData(Range.getBegin()),
	WhitespaceLength) == Text)
	return;
	auto Err = Replaces.add(tooling::Replacement(
	SourceMgr, CharSourceRange::getCharRange(Range), Text));
	// FIXME: better error handling. For now, just print an error message in the
	// release version.
	if (Err) {
	llvm::errs() << llvm::toString(std::move(Err)) << "\n";
	assert(false);
	}
	}

	void WhitespaceManager::appendNewlineText(std::string &Text,
	unsigned Newlines) {
	for (unsigned i = 0; i < Newlines; ++i)
	Text.append(UseCRLF ? "\r\n" : "\n");
	}

	void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,
	unsigned PreviousEndOfTokenColumn,
	unsigned EscapedNewlineColumn) {
	if (Newlines > 0) {
	unsigned Offset =
	std::min<int>(EscapedNewlineColumn - 2, PreviousEndOfTokenColumn);
	for (unsigned i = 0; i < Newlines; ++i) {
	Text.append(EscapedNewlineColumn - Offset - 1, ' ');
	Text.append(UseCRLF ? "\\\r\n" : "\\\n");
	Offset = 0;
	}
	}
	}

	void WhitespaceManager::appendIndentText(std::string &Text,
	unsigned IndentLevel, unsigned Spaces,
	unsigned WhitespaceStartColumn) {
	switch (Style.UseTab) {
	case FormatStyle::UT_Never:
	Text.append(Spaces, ' ');
	break;
	case FormatStyle::UT_Always: {
	unsigned FirstTabWidth =
	Style.TabWidth - WhitespaceStartColumn % Style.TabWidth;
	// Indent with tabs only when there's at least one full tab.
	if (FirstTabWidth + Style.TabWidth <= Spaces) {
	Spaces -= FirstTabWidth;
	Text.append("\t");
	}
	Text.append(Spaces / Style.TabWidth, '\t');
	Text.append(Spaces % Style.TabWidth, ' ');
	break;
	}
	case FormatStyle::UT_ForIndentation:
	if (WhitespaceStartColumn == 0) {
	unsigned Indentation = IndentLevel * Style.IndentWidth;
	// This happens, e.g. when a line in a block comment is indented less than
	// the first one.
	if (Indentation > Spaces)
	Indentation = Spaces;
	unsigned Tabs = Indentation / Style.TabWidth;
	Text.append(Tabs, '\t');
	Spaces -= Tabs * Style.TabWidth;
	}
	Text.append(Spaces, ' ');
	break;
	case FormatStyle::UT_ForContinuationAndIndentation:
	if (WhitespaceStartColumn == 0) {
	unsigned Tabs = Spaces / Style.TabWidth;
	Text.append(Tabs, '\t');
	Spaces -= Tabs * Style.TabWidth;
	}
	Text.append(Spaces, ' ');
	break;
	}
	}

	} // namespace format
	} // namespace clang
	Index: head/contrib/llvm/tools/clang/lib/Headers/unwind.h
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Headers/unwind.h (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/Headers/unwind.h (revision 322855)
	@@ -1,337 +1,299 @@
	/*===---- unwind.h - Stack unwinding ----------------------------------------===
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to deal
	* in the Software without restriction, including without limitation the rights
	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	* copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in
	* all copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	* THE SOFTWARE.
	*
	*===-----------------------------------------------------------------------===
	*/

	/* See "Data Definitions for libgcc_s" in the Linux Standard Base.*/

	#ifndef __CLANG_UNWIND_H
	#define __CLANG_UNWIND_H

	#if defined(__APPLE__) && __has_include_next(<unwind.h>)
	/* Darwin (from 11.x on) provide an unwind.h. If that's available,
	* use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,
	* so define that around the include.*/
	# ifndef _GNU_SOURCE
	# define _SHOULD_UNDEFINE_GNU_SOURCE
	# define _GNU_SOURCE
	# endif
	// libunwind's unwind.h reflects the current visibility. However, Mozilla
	// builds with -fvisibility=hidden and relies on gcc's unwind.h to reset the
	// visibility to default and export its contents. gcc also allows users to
	// override its override by #defining HIDE_EXPORTS (but note, this only obeys
	// the user's -fvisibility setting; it doesn't hide any exports on its own). We
	// imitate gcc's header here:
	# ifdef HIDE_EXPORTS
	# include_next <unwind.h>
	# else
	# pragma GCC visibility push(default)
	# include_next <unwind.h>
	# pragma GCC visibility pop
	# endif
	# ifdef _SHOULD_UNDEFINE_GNU_SOURCE
	# undef _GNU_SOURCE
	# undef _SHOULD_UNDEFINE_GNU_SOURCE
	# endif
	#else

	#include <stdint.h>

	#ifdef __cplusplus
	extern "C" {
	#endif

	/* It is a bit strange for a header to play with the visibility of the
	symbols it declares, but this matches gcc's behavior and some programs
	depend on it */
	#ifndef HIDE_EXPORTS
	#pragma GCC visibility push(default)
	#endif

	typedef uintptr_t _Unwind_Word;
	typedef intptr_t _Unwind_Sword;
	typedef uintptr_t _Unwind_Ptr;
	typedef uintptr_t _Unwind_Internal_Ptr;
	typedef uint64_t _Unwind_Exception_Class;

	typedef intptr_t _sleb128_t;
	typedef uintptr_t _uleb128_t;

	struct _Unwind_Context;
	-#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) \|\| defined(__ARM_DWARF_EH___))
	-struct _Unwind_Control_Block;
	-typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */
	-#else
	struct _Unwind_Exception;
	-typedef struct _Unwind_Exception _Unwind_Exception;
	-#endif
	typedef enum {
	_URC_NO_REASON = 0,
	#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
	!defined(__ARM_DWARF_EH__)
	_URC_OK = 0, /* used by ARM EHABI */
	#endif
	_URC_FOREIGN_EXCEPTION_CAUGHT = 1,

	_URC_FATAL_PHASE2_ERROR = 2,
	_URC_FATAL_PHASE1_ERROR = 3,
	_URC_NORMAL_STOP = 4,

	_URC_END_OF_STACK = 5,
	_URC_HANDLER_FOUND = 6,
	_URC_INSTALL_CONTEXT = 7,
	_URC_CONTINUE_UNWIND = 8,
	#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
	!defined(__ARM_DWARF_EH__)
	_URC_FAILURE = 9 /* used by ARM EHABI */
	#endif
	} _Unwind_Reason_Code;

	typedef enum {
	_UA_SEARCH_PHASE = 1,
	_UA_CLEANUP_PHASE = 2,

	_UA_HANDLER_FRAME = 4,
	_UA_FORCE_UNWIND = 8,
	_UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */
	} _Unwind_Action;

	typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,
	- _Unwind_Exception *);
	+ struct _Unwind_Exception *);

	-#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) \|\| defined(__ARM_DWARF_EH___))
	-typedef struct _Unwind_Control_Block _Unwind_Control_Block;
	-typedef uint32_t _Unwind_EHT_Header;
	-
	-struct _Unwind_Control_Block {
	- uint64_t exception_class;
	- void (exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block );
	- /* unwinder cache (private fields for the unwinder's use) */
	- struct {
	- uint32_t reserved1; /* forced unwind stop function, 0 if not forced */
	- uint32_t reserved2; /* personality routine */
	- uint32_t reserved3; /* callsite */
	- uint32_t reserved4; /* forced unwind stop argument */
	- uint32_t reserved5;
	- } unwinder_cache;
	- /* propagation barrier cache (valid after phase 1) */
	- struct {
	- uint32_t sp;
	- uint32_t bitpattern[5];
	- } barrier_cache;
	- /* cleanup cache (preserved over cleanup) */
	- struct {
	- uint32_t bitpattern[4];
	- } cleanup_cache;
	- /* personality cache (for personality's benefit) */
	- struct {
	- uint32_t fnstart; /* function start address */
	- _Unwind_EHT_Header ehtp; / pointer to EHT entry header word */
	- uint32_t additional; /* additional data */
	- uint32_t reserved1;
	- } pr_cache;
	- long long int : 0; /* force alignment of next item to 8-byte boundary */
	-};
	-#else
	struct _Unwind_Exception {
	_Unwind_Exception_Class exception_class;
	_Unwind_Exception_Cleanup_Fn exception_cleanup;
	_Unwind_Word private_1;
	_Unwind_Word private_2;
	/* The Itanium ABI requires that _Unwind_Exception objects are "double-word
	* aligned". GCC has interpreted this to mean "use the maximum useful
	* alignment for the target"; so do we. */
	} __attribute__((__aligned__));
	-#endif

	typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,
	_Unwind_Exception_Class,
	- _Unwind_Exception *,
	+ struct _Unwind_Exception *,
	struct _Unwind_Context *,
	void *);

	-typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action,
	- _Unwind_Exception_Class,
	- _Unwind_Exception *,
	- struct _Unwind_Context *);
	+typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
	+ int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *,
	+ struct _Unwind_Context *);
	typedef _Unwind_Personality_Fn __personality_routine;

	typedef _Unwind_Reason_Code (_Unwind_Trace_Fn)(struct _Unwind_Context ,
	void *);

	-#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) \|\| defined(__ARM_DWARF_EH___))
	+#if defined(__arm__) && !defined(__APPLE__)
	+
	typedef enum {
	_UVRSC_CORE = 0, /* integer register */
	_UVRSC_VFP = 1, /* vfp */
	_UVRSC_WMMXD = 3, /* Intel WMMX data register */
	_UVRSC_WMMXC = 4 /* Intel WMMX control register */
	} _Unwind_VRS_RegClass;

	typedef enum {
	_UVRSD_UINT32 = 0,
	_UVRSD_VFPX = 1,
	_UVRSD_UINT64 = 3,
	_UVRSD_FLOAT = 4,
	_UVRSD_DOUBLE = 5
	} _Unwind_VRS_DataRepresentation;

	typedef enum {
	_UVRSR_OK = 0,
	_UVRSR_NOT_IMPLEMENTED = 1,
	_UVRSR_FAILED = 2
	} _Unwind_VRS_Result;

	+#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__ARM_DWARF_EH__)
	typedef uint32_t _Unwind_State;
	#define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0)
	#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)
	#define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2)
	#define _US_ACTION_MASK ((_Unwind_State)3)
	#define _US_FORCE_UNWIND ((_Unwind_State)8)
	+#endif

	_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,
	_Unwind_VRS_RegClass __regclass,
	uint32_t __regno,
	_Unwind_VRS_DataRepresentation __representation,
	void *__valuep);

	_Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context,
	_Unwind_VRS_RegClass __regclass,
	uint32_t __regno,
	_Unwind_VRS_DataRepresentation __representation,
	void *__valuep);

	static __inline__
	_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) {
	_Unwind_Word __value;
	_Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);
	return __value;
	}

	static __inline__
	void _Unwind_SetGR(struct _Unwind_Context *__context, int __index,
	_Unwind_Word __value) {
	_Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);
	}

	static __inline__
	_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) {
	_Unwind_Word __ip = _Unwind_GetGR(__context, 15);
	return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */
	}

	static __inline__
	void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) {
	_Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1;
	_Unwind_SetGR(__context, 15, __value \| __thumb_mode_bit);
	}
	#else
	_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int);
	void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word);

	_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *);
	void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word);
	#endif


	_Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context , int );

	_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *);

	_Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *);

	void _Unwind_GetLanguageSpecificData(struct _Unwind_Context );

	_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);

	/* DWARF EH functions; currently not available on Darwin/ARM */
	#if !defined(__APPLE__) \|\| !defined(__arm__)
	-_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);
	-_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn,
	- void *);
	-void _Unwind_DeleteException(_Unwind_Exception *);
	-void _Unwind_Resume(_Unwind_Exception *);
	-_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *);

	+_Unwind_Reason_Code _Unwind_RaiseException(struct _Unwind_Exception *);
	+_Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *,
	+ _Unwind_Stop_Fn, void *);
	+void _Unwind_DeleteException(struct _Unwind_Exception *);
	+void _Unwind_Resume(struct _Unwind_Exception *);
	+_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *);
	+
	#endif

	_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);

	/* setjmp(3)/longjmp(3) stuff */
	typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;

	void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);
	void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);
	-_Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *);
	-_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *,
	+_Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *);
	+_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(struct _Unwind_Exception *,
	_Unwind_Stop_Fn, void *);
	-void _Unwind_SjLj_Resume(_Unwind_Exception *);
	-_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *);
	+void _Unwind_SjLj_Resume(struct _Unwind_Exception *);
	+_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *);

	void _Unwind_FindEnclosingFunction(void );

	#ifdef __APPLE__

	_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *)
	__attribute__((__unavailable__));
	_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *)
	__attribute__((__unavailable__));

	/* Darwin-specific functions */
	void __register_frame(const void *);
	void __deregister_frame(const void *);

	struct dwarf_eh_bases {
	uintptr_t tbase;
	uintptr_t dbase;
	uintptr_t func;
	};
	void _Unwind_Find_FDE(const void , struct dwarf_eh_bases *);

	void __register_frame_info_bases(const void , void , void , void )
	__attribute__((__unavailable__));
	void __register_frame_info(const void , void ) __attribute__((__unavailable__));
	void __register_frame_info_table_bases(const void , void, void , void )
	__attribute__((__unavailable__));
	void __register_frame_info_table(const void , void )
	__attribute__((__unavailable__));
	void __register_frame_table(const void *) __attribute__((__unavailable__));
	void __deregister_frame_info(const void *) __attribute__((__unavailable__));
	void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__));

	#else

	_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *);
	_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *);

	#endif


	#ifndef HIDE_EXPORTS
	#pragma GCC visibility pop
	#endif

	#ifdef __cplusplus
	}
	#endif

	#endif

	#endif /* __CLANG_UNWIND_H */
	Index: head/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp (revision 322855)
	@@ -1,828 +1,839 @@
	//===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements pieces of the Preprocessor interface that manage the
	// current lexer stack.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/Lex/Preprocessor.h"
	#include "clang/Basic/FileManager.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Lex/HeaderSearch.h"
	#include "clang/Lex/LexDiagnostic.h"
	#include "clang/Lex/MacroInfo.h"
	#include "clang/Lex/PTHManager.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/MemoryBuffer.h"
	#include "llvm/Support/Path.h"
	using namespace clang;

	PPCallbacks::~PPCallbacks() {}

	//===----------------------------------------------------------------------===//
	// Miscellaneous Methods.
	//===----------------------------------------------------------------------===//

	/// isInPrimaryFile - Return true if we're in the top-level file, not in a
	/// \#include. This looks through macro expansions and active _Pragma lexers.
	bool Preprocessor::isInPrimaryFile() const {
	if (IsFileLexer())
	return IncludeMacroStack.empty();

	// If there are any stacked lexers, we're in a #include.
	assert(IsFileLexer(IncludeMacroStack[0]) &&
	"Top level include stack isn't our primary lexer?");
	return std::none_of(IncludeMacroStack.begin() + 1, IncludeMacroStack.end(),
	[this](const IncludeStackInfo &ISI) -> bool {
	return IsFileLexer(ISI);
	});
	}

	/// getCurrentLexer - Return the current file lexer being lexed from. Note
	/// that this ignores any potentially active macro expansions and _Pragma
	/// expansions going on at the time.
	PreprocessorLexer *Preprocessor::getCurrentFileLexer() const {
	if (IsFileLexer())
	return CurPPLexer;

	// Look for a stacked lexer.
	for (const IncludeStackInfo &ISI : llvm::reverse(IncludeMacroStack)) {
	if (IsFileLexer(ISI))
	return ISI.ThePPLexer;
	}
	return nullptr;
	}


	//===----------------------------------------------------------------------===//
	// Methods for Entering and Callbacks for leaving various contexts
	//===----------------------------------------------------------------------===//

	/// EnterSourceFile - Add a source file to the top of the include stack and
	/// start lexing tokens from it instead of the current buffer.
	bool Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir,
	SourceLocation Loc) {
	assert(!CurTokenLexer && "Cannot #include a file inside a macro!");
	++NumEnteredSourceFiles;

	if (MaxIncludeStackDepth < IncludeMacroStack.size())
	MaxIncludeStackDepth = IncludeMacroStack.size();

	if (PTH) {
	if (PTHLexer *PL = PTH->CreateLexer(FID)) {
	EnterSourceFileWithPTH(PL, CurDir);
	return false;
	}
	}

	// Get the MemoryBuffer for this FID, if it fails, we fail.
	bool Invalid = false;
	const llvm::MemoryBuffer *InputFile =
	getSourceManager().getBuffer(FID, Loc, &Invalid);
	if (Invalid) {
	SourceLocation FileStart = SourceMgr.getLocForStartOfFile(FID);
	Diag(Loc, diag::err_pp_error_opening_file)
	<< std::string(SourceMgr.getBufferName(FileStart)) << "";
	return true;
	}

	if (isCodeCompletionEnabled() &&
	SourceMgr.getFileEntryForID(FID) == CodeCompletionFile) {
	CodeCompletionFileLoc = SourceMgr.getLocForStartOfFile(FID);
	CodeCompletionLoc =
	CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset);
	}

	EnterSourceFileWithLexer(new Lexer(FID, InputFile, *this), CurDir);
	return false;
	}

	/// EnterSourceFileWithLexer - Add a source file to the top of the include stack
	/// and start lexing tokens from it instead of the current buffer.
	void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
	const DirectoryLookup *CurDir) {

	// Add the current lexer to the include stack.
	if (CurPPLexer \|\| CurTokenLexer)
	PushIncludeMacroStack();

	CurLexer.reset(TheLexer);
	CurPPLexer = TheLexer;
	CurDirLookup = CurDir;
	CurLexerSubmodule = nullptr;
	if (CurLexerKind != CLK_LexAfterModuleImport)
	CurLexerKind = CLK_Lexer;

	// Notify the client, if desired, that we are in a new source file.
	if (Callbacks && !CurLexer->Is_PragmaLexer) {
	SrcMgr::CharacteristicKind FileType =
	SourceMgr.getFileCharacteristic(CurLexer->getFileLoc());

	Callbacks->FileChanged(CurLexer->getFileLoc(),
	PPCallbacks::EnterFile, FileType);
	}
	}

	/// EnterSourceFileWithPTH - Add a source file to the top of the include stack
	/// and start getting tokens from it using the PTH cache.
	void Preprocessor::EnterSourceFileWithPTH(PTHLexer *PL,
	const DirectoryLookup *CurDir) {

	if (CurPPLexer \|\| CurTokenLexer)
	PushIncludeMacroStack();

	CurDirLookup = CurDir;
	CurPTHLexer.reset(PL);
	CurPPLexer = CurPTHLexer.get();
	CurLexerSubmodule = nullptr;
	if (CurLexerKind != CLK_LexAfterModuleImport)
	CurLexerKind = CLK_PTHLexer;

	// Notify the client, if desired, that we are in a new source file.
	if (Callbacks) {
	FileID FID = CurPPLexer->getFileID();
	SourceLocation EnterLoc = SourceMgr.getLocForStartOfFile(FID);
	SrcMgr::CharacteristicKind FileType =
	SourceMgr.getFileCharacteristic(EnterLoc);
	Callbacks->FileChanged(EnterLoc, PPCallbacks::EnterFile, FileType);
	}
	}

	/// EnterMacro - Add a Macro to the top of the include stack and start lexing
	/// tokens from it instead of the current buffer.
	void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
	MacroInfo Macro, MacroArgs Args) {
	std::unique_ptr<TokenLexer> TokLexer;
	if (NumCachedTokenLexers == 0) {
	TokLexer = llvm::make_unique<TokenLexer>(Tok, ILEnd, Macro, Args, *this);
	} else {
	TokLexer = std::move(TokenLexerCache[--NumCachedTokenLexers]);
	TokLexer->Init(Tok, ILEnd, Macro, Args);
	}

	PushIncludeMacroStack();
	CurDirLookup = nullptr;
	CurTokenLexer = std::move(TokLexer);
	if (CurLexerKind != CLK_LexAfterModuleImport)
	CurLexerKind = CLK_TokenLexer;
	}

	/// EnterTokenStream - Add a "macro" context to the top of the include stack,
	/// which will cause the lexer to start returning the specified tokens.
	///
	/// If DisableMacroExpansion is true, tokens lexed from the token stream will
	/// not be subject to further macro expansion. Otherwise, these tokens will
	/// be re-macro-expanded when/if expansion is enabled.
	///
	/// If OwnsTokens is false, this method assumes that the specified stream of
	/// tokens has a permanent owner somewhere, so they do not need to be copied.
	/// If it is true, it assumes the array of tokens is allocated with new[] and
	/// must be freed.
	///
	void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
	bool DisableMacroExpansion,
	bool OwnsTokens) {
	if (CurLexerKind == CLK_CachingLexer) {
	if (CachedLexPos < CachedTokens.size()) {
	// We're entering tokens into the middle of our cached token stream. We
	// can't represent that, so just insert the tokens into the buffer.
	CachedTokens.insert(CachedTokens.begin() + CachedLexPos,
	Toks, Toks + NumToks);
	if (OwnsTokens)
	delete [] Toks;
	return;
	}

	// New tokens are at the end of the cached token sequnece; insert the
	// token stream underneath the caching lexer.
	ExitCachingLexMode();
	EnterTokenStream(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
	EnterCachingLexMode();
	return;
	}

	// Create a macro expander to expand from the specified token stream.
	std::unique_ptr<TokenLexer> TokLexer;
	if (NumCachedTokenLexers == 0) {
	TokLexer = llvm::make_unique<TokenLexer>(
	Toks, NumToks, DisableMacroExpansion, OwnsTokens, *this);
	} else {
	TokLexer = std::move(TokenLexerCache[--NumCachedTokenLexers]);
	TokLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
	}

	// Save our current state.
	PushIncludeMacroStack();
	CurDirLookup = nullptr;
	CurTokenLexer = std::move(TokLexer);
	if (CurLexerKind != CLK_LexAfterModuleImport)
	CurLexerKind = CLK_TokenLexer;
	}

	/// \brief Compute the relative path that names the given file relative to
	/// the given directory.
	static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir,
	const FileEntry *File,
	SmallString<128> &Result) {
	Result.clear();

	StringRef FilePath = File->getDir()->getName();
	StringRef Path = FilePath;
	while (!Path.empty()) {
	if (const DirectoryEntry *CurDir = FM.getDirectory(Path)) {
	if (CurDir == Dir) {
	Result = FilePath.substr(Path.size());
	llvm::sys::path::append(Result,
	llvm::sys::path::filename(File->getName()));
	return;
	}
	}

	Path = llvm::sys::path::parent_path(Path);
	}

	Result = File->getName();
	}

	void Preprocessor::PropagateLineStartLeadingSpaceInfo(Token &Result) {
	if (CurTokenLexer) {
	CurTokenLexer->PropagateLineStartLeadingSpaceInfo(Result);
	return;
	}
	if (CurLexer) {
	CurLexer->PropagateLineStartLeadingSpaceInfo(Result);
	return;
	}
	// FIXME: Handle other kinds of lexers? It generally shouldn't matter,
	// but it might if they're empty?
	}

	/// \brief Determine the location to use as the end of the buffer for a lexer.
	///
	/// If the file ends with a newline, form the EOF token on the newline itself,
	/// rather than "on the line following it", which doesn't exist. This makes
	/// diagnostics relating to the end of file include the last file that the user
	/// actually typed, which is goodness.
	const char *Preprocessor::getCurLexerEndPos() {
	const char *EndPos = CurLexer->BufferEnd;
	if (EndPos != CurLexer->BufferStart &&
	(EndPos[-1] == '\n' \|\| EndPos[-1] == '\r')) {
	--EndPos;

	// Handle \n\r and \r\n:
	if (EndPos != CurLexer->BufferStart &&
	(EndPos[-1] == '\n' \|\| EndPos[-1] == '\r') &&
	EndPos[-1] != EndPos[0])
	--EndPos;
	}

	return EndPos;
	}

	static void collectAllSubModulesWithUmbrellaHeader(
	const Module &Mod, SmallVectorImpl<const Module *> &SubMods) {
	if (Mod.getUmbrellaHeader())
	SubMods.push_back(&Mod);
	for (auto *M : Mod.submodules())
	collectAllSubModulesWithUmbrellaHeader(*M, SubMods);
	}

	void Preprocessor::diagnoseMissingHeaderInUmbrellaDir(const Module &Mod) {
	assert(Mod.getUmbrellaHeader() && "Module must use umbrella header");
	SourceLocation StartLoc =
	SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
	if (getDiagnostics().isIgnored(diag::warn_uncovered_module_header, StartLoc))
	return;

	ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap();
	const DirectoryEntry *Dir = Mod.getUmbrellaDir().Entry;
	vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem();
	std::error_code EC;
	for (vfs::recursive_directory_iterator Entry(FS, Dir->getName(), EC), End;
	Entry != End && !EC; Entry.increment(EC)) {
	using llvm::StringSwitch;

	// Check whether this entry has an extension typically associated with
	// headers.
	if (!StringSwitch<bool>(llvm::sys::path::extension(Entry->getName()))
	.Cases(".h", ".H", ".hh", ".hpp", true)
	.Default(false))
	continue;

	if (const FileEntry *Header = getFileManager().getFile(Entry->getName()))
	if (!getSourceManager().hasFileInfo(Header)) {
	if (!ModMap.isHeaderInUnavailableModule(Header)) {
	// Find the relative path that would access this header.
	SmallString<128> RelativePath;
	computeRelativePath(FileMgr, Dir, Header, RelativePath);
	Diag(StartLoc, diag::warn_uncovered_module_header)
	<< Mod.getFullModuleName() << RelativePath;
	}
	}
	}
	}

	/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
	/// the current file. This either returns the EOF token or pops a level off
	/// the include stack and keeps going.
	bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
	assert(!CurTokenLexer &&
	"Ending a file when currently in a macro!");

	// If we have an unclosed module region from a pragma at the end of a
	// module, complain and close it now.
	// FIXME: This is not correct if we are building a module from PTH.
	const bool LeavingSubmodule = CurLexer && CurLexerSubmodule;
	if ((LeavingSubmodule \|\| IncludeMacroStack.empty()) &&
	!BuildingSubmoduleStack.empty() &&
	BuildingSubmoduleStack.back().IsPragma) {
	Diag(BuildingSubmoduleStack.back().ImportLoc,
	diag::err_pp_module_begin_without_module_end);
	Module M = LeaveSubmodule(/ForPragma*/true);

	Result.startToken();
	const char *EndPos = getCurLexerEndPos();
	CurLexer->BufferPtr = EndPos;
	CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_module_end);
	Result.setAnnotationEndLoc(Result.getLocation());
	Result.setAnnotationValue(M);
	return true;
	}

	// See if this file had a controlling macro.
	if (CurPPLexer) { // Not ending a macro, ignore it.
	if (const IdentifierInfo *ControllingMacro =
	CurPPLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
	// Okay, this has a controlling macro, remember in HeaderFileInfo.
	if (const FileEntry *FE = CurPPLexer->getFileEntry()) {
	HeaderInfo.SetFileControllingMacro(FE, ControllingMacro);
	if (MacroInfo *MI =
	getMacroInfo(const_cast<IdentifierInfo*>(ControllingMacro)))
	MI->setUsedForHeaderGuard(true);
	if (const IdentifierInfo *DefinedMacro =
	CurPPLexer->MIOpt.GetDefinedMacro()) {
	if (!isMacroDefined(ControllingMacro) &&
	DefinedMacro != ControllingMacro &&
	HeaderInfo.FirstTimeLexingFile(FE)) {

	// If the edit distance between the two macros is more than 50%,
	// DefinedMacro may not be header guard, or can be header guard of
	// another header file. Therefore, it maybe defining something
	// completely different. This can be observed in the wild when
	// handling feature macros or header guards in different files.

	const StringRef ControllingMacroName = ControllingMacro->getName();
	const StringRef DefinedMacroName = DefinedMacro->getName();
	const size_t MaxHalfLength = std::max(ControllingMacroName.size(),
	DefinedMacroName.size()) / 2;
	const unsigned ED = ControllingMacroName.edit_distance(
	DefinedMacroName, true, MaxHalfLength);
	if (ED <= MaxHalfLength) {
	// Emit a warning for a bad header guard.
	Diag(CurPPLexer->MIOpt.GetMacroLocation(),
	diag::warn_header_guard)
	<< CurPPLexer->MIOpt.GetMacroLocation() << ControllingMacro;
	Diag(CurPPLexer->MIOpt.GetDefinedLocation(),
	diag::note_header_guard)
	<< CurPPLexer->MIOpt.GetDefinedLocation() << DefinedMacro
	<< ControllingMacro
	<< FixItHint::CreateReplacement(
	CurPPLexer->MIOpt.GetDefinedLocation(),
	ControllingMacro->getName());
	}
	}
	}
	}
	}
	}

	// Complain about reaching a true EOF within arc_cf_code_audited.
	// We don't want to complain about reaching the end of a macro
	// instantiation or a _Pragma.
	if (PragmaARCCFCodeAuditedLoc.isValid() &&
	!isEndOfMacro && !(CurLexer && CurLexer->Is_PragmaLexer)) {
	Diag(PragmaARCCFCodeAuditedLoc, diag::err_pp_eof_in_arc_cf_code_audited);

	// Recover by leaving immediately.
	PragmaARCCFCodeAuditedLoc = SourceLocation();
	}

	// Complain about reaching a true EOF within assume_nonnull.
	// We don't want to complain about reaching the end of a macro
	// instantiation or a _Pragma.
	if (PragmaAssumeNonNullLoc.isValid() &&
	!isEndOfMacro && !(CurLexer && CurLexer->Is_PragmaLexer)) {
	Diag(PragmaAssumeNonNullLoc, diag::err_pp_eof_in_assume_nonnull);

	// Recover by leaving immediately.
	PragmaAssumeNonNullLoc = SourceLocation();
	}

	// If this is a #include'd file, pop it off the include stack and continue
	// lexing the #includer file.
	if (!IncludeMacroStack.empty()) {

	// If we lexed the code-completion file, act as if we reached EOF.
	if (isCodeCompletionEnabled() && CurPPLexer &&
	SourceMgr.getLocForStartOfFile(CurPPLexer->getFileID()) ==
	CodeCompletionFileLoc) {
	if (CurLexer) {
	Result.startToken();
	CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
	CurLexer.reset();
	} else {
	assert(CurPTHLexer && "Got EOF but no current lexer set!");
	CurPTHLexer->getEOF(Result);
	CurPTHLexer.reset();
	}

	CurPPLexer = nullptr;
	return true;
	}

	if (!isEndOfMacro && CurPPLexer &&
	SourceMgr.getIncludeLoc(CurPPLexer->getFileID()).isValid()) {
	// Notify SourceManager to record the number of FileIDs that were created
	// during lexing of the #include'd file.
	unsigned NumFIDs =
	SourceMgr.local_sloc_entry_size() -
	CurPPLexer->getInitialNumSLocEntries() + 1/#include'd file/;
	SourceMgr.setNumCreatedFIDsForFileID(CurPPLexer->getFileID(), NumFIDs);
	}

	+ bool ExitedFromPredefinesFile = false;
	FileID ExitedFID;
	- if (Callbacks && !isEndOfMacro && CurPPLexer)
	+ if (!isEndOfMacro && CurPPLexer) {
	ExitedFID = CurPPLexer->getFileID();

	+ assert(PredefinesFileID.isValid() &&
	+ "HandleEndOfFile is called before PredefinesFileId is set");
	+ ExitedFromPredefinesFile = (PredefinesFileID == ExitedFID);
	+ }
	+
	if (LeavingSubmodule) {
	// We're done with this submodule.
	Module M = LeaveSubmodule(/ForPragma*/false);

	// Notify the parser that we've left the module.
	const char *EndPos = getCurLexerEndPos();
	Result.startToken();
	CurLexer->BufferPtr = EndPos;
	CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_module_end);
	Result.setAnnotationEndLoc(Result.getLocation());
	Result.setAnnotationValue(M);
	}

	// We're done with the #included file.
	RemoveTopOfLexerStack();

	// Propagate info about start-of-line/leading white-space/etc.
	PropagateLineStartLeadingSpaceInfo(Result);

	// Notify the client, if desired, that we are in a new source file.
	if (Callbacks && !isEndOfMacro && CurPPLexer) {
	SrcMgr::CharacteristicKind FileType =
	SourceMgr.getFileCharacteristic(CurPPLexer->getSourceLocation());
	Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
	PPCallbacks::ExitFile, FileType, ExitedFID);
	}
	+
	+ // Restore conditional stack from the preamble right after exiting from the
	+ // predefines file.
	+ if (ExitedFromPredefinesFile)
	+ replayPreambleConditionalStack();

	// Client should lex another token unless we generated an EOM.
	return LeavingSubmodule;
	}

	// If this is the end of the main file, form an EOF token.
	if (CurLexer) {
	const char *EndPos = getCurLexerEndPos();
	Result.startToken();
	CurLexer->BufferPtr = EndPos;
	CurLexer->FormTokenWithChars(Result, EndPos, tok::eof);

	if (isCodeCompletionEnabled()) {
	// Inserting the code-completion point increases the source buffer by 1,
	// but the main FileID was created before inserting the point.
	// Compensate by reducing the EOF location by 1, otherwise the location
	// will point to the next FileID.
	// FIXME: This is hacky, the code-completion point should probably be
	// inserted before the main FileID is created.
	if (CurLexer->getFileLoc() == CodeCompletionFileLoc)
	Result.setLocation(Result.getLocation().getLocWithOffset(-1));
	}

	if (!isIncrementalProcessingEnabled())
	// We're done with lexing.
	CurLexer.reset();
	} else {
	assert(CurPTHLexer && "Got EOF but no current lexer set!");
	CurPTHLexer->getEOF(Result);
	CurPTHLexer.reset();
	}

	if (!isIncrementalProcessingEnabled())
	CurPPLexer = nullptr;

	if (TUKind == TU_Complete) {
	// This is the end of the top-level file. 'WarnUnusedMacroLocs' has
	// collected all macro locations that we need to warn because they are not
	// used.
	for (WarnUnusedMacroLocsTy::iterator
	I=WarnUnusedMacroLocs.begin(), E=WarnUnusedMacroLocs.end();
	I!=E; ++I)
	Diag(*I, diag::pp_macro_not_used);
	}

	// If we are building a module that has an umbrella header, make sure that
	// each of the headers within the directory, including all submodules, is
	// covered by the umbrella header was actually included by the umbrella
	// header.
	if (Module *Mod = getCurrentModule()) {
	llvm::SmallVector<const Module *, 4> AllMods;
	collectAllSubModulesWithUmbrellaHeader(*Mod, AllMods);
	for (auto *M : AllMods)
	diagnoseMissingHeaderInUmbrellaDir(*M);
	}

	return true;
	}

	/// HandleEndOfTokenLexer - This callback is invoked when the current TokenLexer
	/// hits the end of its token stream.
	bool Preprocessor::HandleEndOfTokenLexer(Token &Result) {
	assert(CurTokenLexer && !CurPPLexer &&
	"Ending a macro when currently in a #include file!");

	if (!MacroExpandingLexersStack.empty() &&
	MacroExpandingLexersStack.back().first == CurTokenLexer.get())
	removeCachedMacroExpandedTokensOfLastLexer();

	// Delete or cache the now-dead macro expander.
	if (NumCachedTokenLexers == TokenLexerCacheSize)
	CurTokenLexer.reset();
	else
	TokenLexerCache[NumCachedTokenLexers++] = std::move(CurTokenLexer);

	// Handle this like a #include file being popped off the stack.
	return HandleEndOfFile(Result, true);
	}

	/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
	/// lexer stack. This should only be used in situations where the current
	/// state of the top-of-stack lexer is unknown.
	void Preprocessor::RemoveTopOfLexerStack() {
	assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load");

	if (CurTokenLexer) {
	// Delete or cache the now-dead macro expander.
	if (NumCachedTokenLexers == TokenLexerCacheSize)
	CurTokenLexer.reset();
	else
	TokenLexerCache[NumCachedTokenLexers++] = std::move(CurTokenLexer);
	}

	PopIncludeMacroStack();
	}

	/// HandleMicrosoftCommentPaste - When the macro expander pastes together a
	/// comment (/##/) in microsoft mode, this method handles updating the current
	/// state, returning the token on the next source line.
	void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) {
	assert(CurTokenLexer && !CurPPLexer &&
	"Pasted comment can only be formed from macro");
	// We handle this by scanning for the closest real lexer, switching it to
	// raw mode and preprocessor mode. This will cause it to return \n as an
	// explicit EOD token.
	PreprocessorLexer *FoundLexer = nullptr;
	bool LexerWasInPPMode = false;
	for (const IncludeStackInfo &ISI : llvm::reverse(IncludeMacroStack)) {
	if (ISI.ThePPLexer == nullptr) continue; // Scan for a real lexer.

	// Once we find a real lexer, mark it as raw mode (disabling macro
	// expansions) and preprocessor mode (return EOD). We know that the lexer
	// was not in raw mode before, because the macro that the comment came
	// from was expanded. However, it could have already been in preprocessor
	// mode (#if COMMENT) in which case we have to return it to that mode and
	// return EOD.
	FoundLexer = ISI.ThePPLexer;
	FoundLexer->LexingRawMode = true;
	LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective;
	FoundLexer->ParsingPreprocessorDirective = true;
	break;
	}

	// Okay, we either found and switched over the lexer, or we didn't find a
	// lexer. In either case, finish off the macro the comment came from, getting
	// the next token.
	if (!HandleEndOfTokenLexer(Tok)) Lex(Tok);

	// Discarding comments as long as we don't have EOF or EOD. This 'comments
	// out' the rest of the line, including any tokens that came from other macros
	// that were active, as in:
	// #define submacro a COMMENT b
	// submacro c
	// which should lex to 'a' only: 'b' and 'c' should be removed.
	while (Tok.isNot(tok::eod) && Tok.isNot(tok::eof))
	Lex(Tok);

	// If we got an eod token, then we successfully found the end of the line.
	if (Tok.is(tok::eod)) {
	assert(FoundLexer && "Can't get end of line without an active lexer");
	// Restore the lexer back to normal mode instead of raw mode.
	FoundLexer->LexingRawMode = false;

	// If the lexer was already in preprocessor mode, just return the EOD token
	// to finish the preprocessor line.
	if (LexerWasInPPMode) return;

	// Otherwise, switch out of PP mode and return the next lexed token.
	FoundLexer->ParsingPreprocessorDirective = false;
	return Lex(Tok);
	}

	// If we got an EOF token, then we reached the end of the token stream but
	// didn't find an explicit \n. This can only happen if there was no lexer
	// active (an active lexer would return EOD at EOF if there was no \n in
	// preprocessor directive mode), so just return EOF as our token.
	assert(!FoundLexer && "Lexer should return EOD before EOF in PP mode");
	}

	void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc,
	bool ForPragma) {
	if (!getLangOpts().ModulesLocalVisibility) {
	// Just track that we entered this submodule.
	BuildingSubmoduleStack.push_back(
	BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState,
	PendingModuleMacroNames.size()));
	return;
	}

	// Resolve as much of the module definition as we can now, before we enter
	// one of its headers.
	// FIXME: Can we enable Complain here?
	// FIXME: Can we do this when local visibility is disabled?
	ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap();
	ModMap.resolveExports(M, /Complain=/false);
	ModMap.resolveUses(M, /Complain=/false);
	ModMap.resolveConflicts(M, /Complain=/false);

	// If this is the first time we've entered this module, set up its state.
	auto R = Submodules.insert(std::make_pair(M, SubmoduleState()));
	auto &State = R.first->second;
	bool FirstTime = R.second;
	if (FirstTime) {
	// Determine the set of starting macros for this submodule; take these
	// from the "null" module (the predefines buffer).
	//
	// FIXME: If we have local visibility but not modules enabled, the
	// NullSubmoduleState is polluted by #defines in the top-level source
	// file.
	auto &StartingMacros = NullSubmoduleState.Macros;

	// Restore to the starting state.
	// FIXME: Do this lazily, when each macro name is first referenced.
	for (auto &Macro : StartingMacros) {
	// Skip uninteresting macros.
	if (!Macro.second.getLatest() &&
	Macro.second.getOverriddenMacros().empty())
	continue;

	MacroState MS(Macro.second.getLatest());
	MS.setOverriddenMacros(*this, Macro.second.getOverriddenMacros());
	State.Macros.insert(std::make_pair(Macro.first, std::move(MS)));
	}
	}

	// Track that we entered this module.
	BuildingSubmoduleStack.push_back(
	BuildingSubmoduleInfo(M, ImportLoc, ForPragma, CurSubmoduleState,
	PendingModuleMacroNames.size()));

	// Switch to this submodule as the current submodule.
	CurSubmoduleState = &State;

	// This module is visible to itself.
	if (FirstTime)
	makeModuleVisible(M, ImportLoc);
	}

	bool Preprocessor::needModuleMacros() const {
	// If we're not within a submodule, we never need to create ModuleMacros.
	if (BuildingSubmoduleStack.empty())
	return false;
	// If we are tracking module macro visibility even for textually-included
	// headers, we need ModuleMacros.
	if (getLangOpts().ModulesLocalVisibility)
	return true;
	// Otherwise, we only need module macros if we're actually compiling a module
	// interface.
	return getLangOpts().isCompilingModule();
	}

	Module *Preprocessor::LeaveSubmodule(bool ForPragma) {
	if (BuildingSubmoduleStack.empty() \|\|
	BuildingSubmoduleStack.back().IsPragma != ForPragma) {
	assert(ForPragma && "non-pragma module enter/leave mismatch");
	return nullptr;
	}

	auto &Info = BuildingSubmoduleStack.back();

	Module *LeavingMod = Info.M;
	SourceLocation ImportLoc = Info.ImportLoc;

	if (!needModuleMacros() \|\|
	(!getLangOpts().ModulesLocalVisibility &&
	LeavingMod->getTopLevelModuleName() != getLangOpts().CurrentModule)) {
	// If we don't need module macros, or this is not a module for which we
	// are tracking macro visibility, don't build any, and preserve the list
	// of pending names for the surrounding submodule.
	BuildingSubmoduleStack.pop_back();
	makeModuleVisible(LeavingMod, ImportLoc);
	return LeavingMod;
	}

	// Create ModuleMacros for any macros defined in this submodule.
	llvm::SmallPtrSet<const IdentifierInfo*, 8> VisitedMacros;
	for (unsigned I = Info.OuterPendingModuleMacroNames;
	I != PendingModuleMacroNames.size(); ++I) {
	auto II = const_cast<IdentifierInfo>(PendingModuleMacroNames[I]);
	if (!VisitedMacros.insert(II).second)
	continue;

	auto MacroIt = CurSubmoduleState->Macros.find(II);
	if (MacroIt == CurSubmoduleState->Macros.end())
	continue;
	auto &Macro = MacroIt->second;

	// Find the starting point for the MacroDirective chain in this submodule.
	MacroDirective *OldMD = nullptr;
	auto *OldState = Info.OuterSubmoduleState;
	if (getLangOpts().ModulesLocalVisibility)
	OldState = &NullSubmoduleState;
	if (OldState && OldState != CurSubmoduleState) {
	// FIXME: It'd be better to start at the state from when we most recently
	// entered this submodule, but it doesn't really matter.
	auto &OldMacros = OldState->Macros;
	auto OldMacroIt = OldMacros.find(II);
	if (OldMacroIt == OldMacros.end())
	OldMD = nullptr;
	else
	OldMD = OldMacroIt->second.getLatest();
	}

	// This module may have exported a new macro. If so, create a ModuleMacro
	// representing that fact.
	bool ExplicitlyPublic = false;
	for (auto *MD = Macro.getLatest(); MD != OldMD; MD = MD->getPrevious()) {
	assert(MD && "broken macro directive chain");

	if (auto *VisMD = dyn_cast<VisibilityMacroDirective>(MD)) {
	// The latest visibility directive for a name in a submodule affects
	// all the directives that come before it.
	if (VisMD->isPublic())
	ExplicitlyPublic = true;
	else if (!ExplicitlyPublic)
	// Private with no following public directive: not exported.
	break;
	} else {
	MacroInfo *Def = nullptr;
	if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD))
	Def = DefMD->getInfo();

	// FIXME: Issue a warning if multiple headers for the same submodule
	// define a macro, rather than silently ignoring all but the first.
	bool IsNew;
	// Don't bother creating a module macro if it would represent a #undef
	// that doesn't override anything.
	if (Def \|\| !Macro.getOverriddenMacros().empty())
	addModuleMacro(LeavingMod, II, Def,
	Macro.getOverriddenMacros(), IsNew);

	if (!getLangOpts().ModulesLocalVisibility) {
	// This macro is exposed to the rest of this compilation as a
	// ModuleMacro; we don't need to track its MacroDirective any more.
	Macro.setLatest(nullptr);
	Macro.setOverriddenMacros(*this, {});
	}
	break;
	}
	}
	}
	PendingModuleMacroNames.resize(Info.OuterPendingModuleMacroNames);

	// FIXME: Before we leave this submodule, we should parse all the other
	// headers within it. Otherwise, we're left with an inconsistent state
	// where we've made the module visible but don't yet have its complete
	// contents.

	// Put back the outer module's state, if we're tracking it.
	if (getLangOpts().ModulesLocalVisibility)
	CurSubmoduleState = Info.OuterSubmoduleState;

	BuildingSubmoduleStack.pop_back();

	// A nested #include makes the included submodule visible.
	makeModuleVisible(LeavingMod, ImportLoc);
	return LeavingMod;
	}
	Index: head/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp (revision 322855)
	@@ -1,957 +1,959 @@
	//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the Preprocessor interface.
	//
	//===----------------------------------------------------------------------===//
	//
	// Options to support:
	// -H - Print the name of each header file used.
	// -d[DNI] - Dump various things.
	// -fworking-directory - #line's with preprocessor's working dir.
	// -fpreprocessed
	// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
	// -W*
	// -w
	//
	// Messages to emit:
	// "Multiple include guards may be useful for:\n"
	//
	//===----------------------------------------------------------------------===//

	#include "clang/Lex/Preprocessor.h"
	#include "clang/Basic/FileManager.h"
	#include "clang/Basic/FileSystemStatCache.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Lex/CodeCompletionHandler.h"
	#include "clang/Lex/ExternalPreprocessorSource.h"
	#include "clang/Lex/HeaderSearch.h"
	#include "clang/Lex/LexDiagnostic.h"
	#include "clang/Lex/LiteralSupport.h"
	#include "clang/Lex/MacroArgs.h"
	#include "clang/Lex/MacroInfo.h"
	#include "clang/Lex/ModuleLoader.h"
	#include "clang/Lex/PTHManager.h"
	#include "clang/Lex/Pragma.h"
	#include "clang/Lex/PreprocessingRecord.h"
	#include "clang/Lex/PreprocessorOptions.h"
	#include "clang/Lex/ScratchBuffer.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/SmallString.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Support/Capacity.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/MemoryBuffer.h"
	#include "llvm/Support/raw_ostream.h"
	#include <algorithm>
	#include <cassert>
	#include <memory>
	#include <string>
	#include <utility>
	#include <vector>

	using namespace clang;

	LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)

	//===----------------------------------------------------------------------===//
	ExternalPreprocessorSource::~ExternalPreprocessorSource() { }

	Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
	DiagnosticsEngine &diags, LangOptions &opts,
	SourceManager &SM, MemoryBufferCache &PCMCache,
	HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
	IdentifierInfoLookup *IILookup, bool OwnsHeaders,
	TranslationUnitKind TUKind)
	: PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
	AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
	PCMCache(PCMCache), ScratchBuf(new ScratchBuffer(SourceMgr)),
	HeaderInfo(Headers), TheModuleLoader(TheModuleLoader),
	ExternalSource(nullptr), Identifiers(opts, IILookup),
	PragmaHandlers(new PragmaNamespace(StringRef())),
	IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
	CodeCompletionFile(nullptr), CodeCompletionOffset(0),
	LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
	CodeCompletionReached(false), CodeCompletionII(nullptr),
	MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
	CurDirLookup(nullptr), CurLexerKind(CLK_Lexer),
	CurLexerSubmodule(nullptr), Callbacks(nullptr),
	CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
	Record(nullptr), MIChainHead(nullptr) {
	OwnsHeaderSearch = OwnsHeaders;

	CounterValue = 0; // __COUNTER__ starts at 0.

	// Clear stats.
	NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
	NumIf = NumElse = NumEndif = 0;
	NumEnteredSourceFiles = 0;
	NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
	NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
	MaxIncludeStackDepth = 0;
	NumSkipped = 0;

	// Default to discarding comments.
	KeepComments = false;
	KeepMacroComments = false;
	SuppressIncludeNotFoundError = false;

	// Macro expansion is enabled.
	DisableMacroExpansion = false;
	MacroExpansionInDirectivesOverride = false;
	InMacroArgs = false;
	InMacroArgPreExpansion = false;
	NumCachedTokenLexers = 0;
	PragmasEnabled = true;
	ParsingIfOrElifDirective = false;
	PreprocessedOutput = false;

	CachedLexPos = 0;

	// We haven't read anything from the external source.
	ReadMacrosFromExternalSource = false;

	// "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
	// This gets unpoisoned where it is allowed.
	(Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
	SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);

	// Initialize the pragma handlers.
	RegisterBuiltinPragmas();

	// Initialize builtin macros like __LINE__ and friends.
	RegisterBuiltinMacros();

	if(LangOpts.Borland) {
	Ident__exception_info = getIdentifierInfo("_exception_info");
	Ident___exception_info = getIdentifierInfo("__exception_info");
	Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
	Ident__exception_code = getIdentifierInfo("_exception_code");
	Ident___exception_code = getIdentifierInfo("__exception_code");
	Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
	Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
	Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
	Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
	} else {
	Ident__exception_info = Ident__exception_code = nullptr;
	Ident__abnormal_termination = Ident___exception_info = nullptr;
	Ident___exception_code = Ident___abnormal_termination = nullptr;
	Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
	Ident_AbnormalTermination = nullptr;
	}

	if (this->PPOpts->GeneratePreamble)
	PreambleConditionalStack.startRecording();
	}

	Preprocessor::~Preprocessor() {
	assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");

	IncludeMacroStack.clear();

	// Destroy any macro definitions.
	while (MacroInfoChain *I = MIChainHead) {
	MIChainHead = I->Next;
	I->~MacroInfoChain();
	}

	// Free any cached macro expanders.
	// This populates MacroArgCache, so all TokenLexers need to be destroyed
	// before the code below that frees up the MacroArgCache list.
	std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
	CurTokenLexer.reset();

	// Free any cached MacroArgs.
	for (MacroArgs *ArgList = MacroArgCache; ArgList;)
	ArgList = ArgList->deallocate();

	// Delete the header search info, if we own it.
	if (OwnsHeaderSearch)
	delete &HeaderInfo;
	}

	void Preprocessor::Initialize(const TargetInfo &Target,
	const TargetInfo *AuxTarget) {
	assert((!this->Target \|\| this->Target == &Target) &&
	"Invalid override of target information");
	this->Target = &Target;

	assert((!this->AuxTarget \|\| this->AuxTarget == AuxTarget) &&
	"Invalid override of aux target information.");
	this->AuxTarget = AuxTarget;

	// Initialize information about built-ins.
	BuiltinInfo.InitializeTarget(Target, AuxTarget);
	HeaderInfo.setTarget(Target);
	}

	void Preprocessor::InitializeForModelFile() {
	NumEnteredSourceFiles = 0;

	// Reset pragmas
	PragmaHandlersBackup = std::move(PragmaHandlers);
	PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
	RegisterBuiltinPragmas();

	// Reset PredefinesFileID
	PredefinesFileID = FileID();
	}

	void Preprocessor::FinalizeForModelFile() {
	NumEnteredSourceFiles = 1;

	PragmaHandlers = std::move(PragmaHandlersBackup);
	}

	void Preprocessor::setPTHManager(PTHManager* pm) {
	PTH.reset(pm);
	FileMgr.addStatCache(PTH->createStatCache());
	}

	void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
	llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
	<< getSpelling(Tok) << "'";

	if (!DumpFlags) return;

	llvm::errs() << "\t";
	if (Tok.isAtStartOfLine())
	llvm::errs() << " [StartOfLine]";
	if (Tok.hasLeadingSpace())
	llvm::errs() << " [LeadingSpace]";
	if (Tok.isExpandDisabled())
	llvm::errs() << " [ExpandDisabled]";
	if (Tok.needsCleaning()) {
	const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
	llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
	<< "']";
	}

	llvm::errs() << "\tLoc=<";
	DumpLocation(Tok.getLocation());
	llvm::errs() << ">";
	}

	void Preprocessor::DumpLocation(SourceLocation Loc) const {
	Loc.dump(SourceMgr);
	}

	void Preprocessor::DumpMacro(const MacroInfo &MI) const {
	llvm::errs() << "MACRO: ";
	for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
	DumpToken(MI.getReplacementToken(i));
	llvm::errs() << " ";
	}
	llvm::errs() << "\n";
	}

	void Preprocessor::PrintStats() {
	llvm::errs() << "\n*** Preprocessor Stats:\n";
	llvm::errs() << NumDirectives << " directives found:\n";
	llvm::errs() << " " << NumDefined << " #define.\n";
	llvm::errs() << " " << NumUndefined << " #undef.\n";
	llvm::errs() << " #include/#include_next/#import:\n";
	llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
	llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
	llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
	llvm::errs() << " " << NumElse << " #else/#elif.\n";
	llvm::errs() << " " << NumEndif << " #endif.\n";
	llvm::errs() << " " << NumPragma << " #pragma.\n";
	llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";

	llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
	<< NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
	<< NumFastMacroExpanded << " on the fast path.\n";
	llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
	<< " token paste (##) operations performed, "
	<< NumFastTokenPaste << " on the fast path.\n";

	llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";

	llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
	llvm::errs() << "\n Macro Expanded Tokens: "
	<< llvm::capacity_in_bytes(MacroExpandedTokens);
	llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
	// FIXME: List information for all submodules.
	llvm::errs() << "\n Macros: "
	<< llvm::capacity_in_bytes(CurSubmoduleState->Macros);
	llvm::errs() << "\n #pragma push_macro Info: "
	<< llvm::capacity_in_bytes(PragmaPushMacroInfo);
	llvm::errs() << "\n Poison Reasons: "
	<< llvm::capacity_in_bytes(PoisonReasons);
	llvm::errs() << "\n Comment Handlers: "
	<< llvm::capacity_in_bytes(CommentHandlers) << "\n";
	}

	Preprocessor::macro_iterator
	Preprocessor::macro_begin(bool IncludeExternalMacros) const {
	if (IncludeExternalMacros && ExternalSource &&
	!ReadMacrosFromExternalSource) {
	ReadMacrosFromExternalSource = true;
	ExternalSource->ReadDefinedMacros();
	}

	// Make sure we cover all macros in visible modules.
	for (const ModuleMacro &Macro : ModuleMacros)
	CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));

	return CurSubmoduleState->Macros.begin();
	}

	size_t Preprocessor::getTotalMemory() const {
	return BP.getTotalMemory()
	+ llvm::capacity_in_bytes(MacroExpandedTokens)
	+ Predefines.capacity() /* Predefines buffer. */
	// FIXME: Include sizes from all submodules, and include MacroInfo sizes,
	// and ModuleMacros.
	+ llvm::capacity_in_bytes(CurSubmoduleState->Macros)
	+ llvm::capacity_in_bytes(PragmaPushMacroInfo)
	+ llvm::capacity_in_bytes(PoisonReasons)
	+ llvm::capacity_in_bytes(CommentHandlers);
	}

	Preprocessor::macro_iterator
	Preprocessor::macro_end(bool IncludeExternalMacros) const {
	if (IncludeExternalMacros && ExternalSource &&
	!ReadMacrosFromExternalSource) {
	ReadMacrosFromExternalSource = true;
	ExternalSource->ReadDefinedMacros();
	}

	return CurSubmoduleState->Macros.end();
	}

	/// \brief Compares macro tokens with a specified token value sequence.
	static bool MacroDefinitionEquals(const MacroInfo *MI,
	ArrayRef<TokenValue> Tokens) {
	return Tokens.size() == MI->getNumTokens() &&
	std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
	}

	StringRef Preprocessor::getLastMacroWithSpelling(
	SourceLocation Loc,
	ArrayRef<TokenValue> Tokens) const {
	SourceLocation BestLocation;
	StringRef BestSpelling;
	for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
	I != E; ++I) {
	const MacroDirective::DefInfo
	Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
	if (!Def \|\| !Def.getMacroInfo())
	continue;
	if (!Def.getMacroInfo()->isObjectLike())
	continue;
	if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
	continue;
	SourceLocation Location = Def.getLocation();
	// Choose the macro defined latest.
	if (BestLocation.isInvalid() \|\|
	(Location.isValid() &&
	SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
	BestLocation = Location;
	BestSpelling = I->first->getName();
	}
	}
	return BestSpelling;
	}

	void Preprocessor::recomputeCurLexerKind() {
	if (CurLexer)
	CurLexerKind = CLK_Lexer;
	else if (CurPTHLexer)
	CurLexerKind = CLK_PTHLexer;
	else if (CurTokenLexer)
	CurLexerKind = CLK_TokenLexer;
	else
	CurLexerKind = CLK_CachingLexer;
	}

	bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
	unsigned CompleteLine,
	unsigned CompleteColumn) {
	assert(File);
	assert(CompleteLine && CompleteColumn && "Starts from 1:1");
	assert(!CodeCompletionFile && "Already set");

	using llvm::MemoryBuffer;

	// Load the actual file's contents.
	bool Invalid = false;
	const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
	if (Invalid)
	return true;

	// Find the byte position of the truncation point.
	const char *Position = Buffer->getBufferStart();
	for (unsigned Line = 1; Line < CompleteLine; ++Line) {
	for (; *Position; ++Position) {
	if (Position != '\r' && Position != '\n')
	continue;

	// Eat \r\n or \n\r as a single line.
	if ((Position[1] == '\r' \|\| Position[1] == '\n') &&
	Position[0] != Position[1])
	++Position;
	++Position;
	break;
	}
	}

	Position += CompleteColumn - 1;

	// If pointing inside the preamble, adjust the position at the beginning of
	// the file after the preamble.
	if (SkipMainFilePreamble.first &&
	SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
	if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
	Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
	}

	if (Position > Buffer->getBufferEnd())
	Position = Buffer->getBufferEnd();

	CodeCompletionFile = File;
	CodeCompletionOffset = Position - Buffer->getBufferStart();

	std::unique_ptr<MemoryBuffer> NewBuffer =
	MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
	Buffer->getBufferIdentifier());
	char NewBuf = const_cast<char>(NewBuffer->getBufferStart());
	char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
	*NewPos = '\0';
	std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
	SourceMgr.overrideFileContents(File, std::move(NewBuffer));

	return false;
	}

	void Preprocessor::CodeCompleteNaturalLanguage() {
	if (CodeComplete)
	CodeComplete->CodeCompleteNaturalLanguage();
	setCodeCompletionReached();
	}

	/// getSpelling - This method is used to get the spelling of a token into a
	/// SmallVector. Note that the returned StringRef may not point to the
	/// supplied buffer if a copy can be avoided.
	StringRef Preprocessor::getSpelling(const Token &Tok,
	SmallVectorImpl<char> &Buffer,
	bool *Invalid) const {
	// NOTE: this has to be checked before testing for an IdentifierInfo.
	if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
	// Try the fast path.
	if (const IdentifierInfo *II = Tok.getIdentifierInfo())
	return II->getName();
	}

	// Resize the buffer if we need to copy into it.
	if (Tok.needsCleaning())
	Buffer.resize(Tok.getLength());

	const char *Ptr = Buffer.data();
	unsigned Len = getSpelling(Tok, Ptr, Invalid);
	return StringRef(Ptr, Len);
	}

	/// CreateString - Plop the specified string into a scratch buffer and return a
	/// location for it. If specified, the source location provides a source
	/// location for the token.
	void Preprocessor::CreateString(StringRef Str, Token &Tok,
	SourceLocation ExpansionLocStart,
	SourceLocation ExpansionLocEnd) {
	Tok.setLength(Str.size());

	const char *DestPtr;
	SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);

	if (ExpansionLocStart.isValid())
	Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
	ExpansionLocEnd, Str.size());
	Tok.setLocation(Loc);

	// If this is a raw identifier or a literal token, set the pointer data.
	if (Tok.is(tok::raw_identifier))
	Tok.setRawIdentifierData(DestPtr);
	else if (Tok.isLiteral())
	Tok.setLiteralData(DestPtr);
	}

	Module *Preprocessor::getCurrentModule() {
	if (!getLangOpts().isCompilingModule())
	return nullptr;

	return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
	}

	//===----------------------------------------------------------------------===//
	// Preprocessor Initialization Methods
	//===----------------------------------------------------------------------===//

	/// EnterMainSourceFile - Enter the specified FileID as the main source file,
	/// which implicitly adds the builtin defines etc.
	void Preprocessor::EnterMainSourceFile() {
	// We do not allow the preprocessor to reenter the main file. Doing so will
	// cause FileID's to accumulate information from both runs (e.g. #line
	// information) and predefined macros aren't guaranteed to be set properly.
	assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
	FileID MainFileID = SourceMgr.getMainFileID();

	// If MainFileID is loaded it means we loaded an AST file, no need to enter
	// a main file.
	if (!SourceMgr.isLoadedFileID(MainFileID)) {
	// Enter the main file source buffer.
	EnterSourceFile(MainFileID, nullptr, SourceLocation());

	// If we've been asked to skip bytes in the main file (e.g., as part of a
	// precompiled preamble), do so now.
	if (SkipMainFilePreamble.first > 0)
	CurLexer->SkipBytes(SkipMainFilePreamble.first,
	SkipMainFilePreamble.second);

	// Tell the header info that the main file was entered. If the file is later
	// #imported, it won't be re-entered.
	if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
	HeaderInfo.IncrementIncludeCount(FE);
	}

	// Preprocess Predefines to populate the initial preprocessor state.
	std::unique_ptr<llvm::MemoryBuffer> SB =
	llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
	assert(SB && "Cannot create predefined source buffer");
	FileID FID = SourceMgr.createFileID(std::move(SB));
	assert(FID.isValid() && "Could not create FileID for predefines?");
	setPredefinesFileID(FID);

	// Start parsing the predefines.
	EnterSourceFile(FID, nullptr, SourceLocation());
	}

	void Preprocessor::replayPreambleConditionalStack() {
	// Restore the conditional stack from the preamble, if there is one.
	if (PreambleConditionalStack.isReplaying()) {
	+ assert(CurPPLexer &&
	+ "CurPPLexer is null when calling replayPreambleConditionalStack.");
	CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
	PreambleConditionalStack.doneReplaying();
	}
	}

	void Preprocessor::EndSourceFile() {
	// Notify the client that we reached the end of the source file.
	if (Callbacks)
	Callbacks->EndOfMainFile();
	}

	//===----------------------------------------------------------------------===//
	// Lexer Event Handling.
	//===----------------------------------------------------------------------===//

	/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
	/// identifier information for the token and install it into the token,
	/// updating the token kind accordingly.
	IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
	assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");

	// Look up this token, see if it is a macro, or if it is a language keyword.
	IdentifierInfo *II;
	if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
	// No cleaning needed, just use the characters from the lexed buffer.
	II = getIdentifierInfo(Identifier.getRawIdentifier());
	} else {
	// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
	SmallString<64> IdentifierBuffer;
	StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);

	if (Identifier.hasUCN()) {
	SmallString<64> UCNIdentifierBuffer;
	expandUCNs(UCNIdentifierBuffer, CleanedStr);
	II = getIdentifierInfo(UCNIdentifierBuffer);
	} else {
	II = getIdentifierInfo(CleanedStr);
	}
	}

	// Update the token info (identifier info and appropriate token kind).
	Identifier.setIdentifierInfo(II);
	if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
	getSourceManager().isInSystemHeader(Identifier.getLocation()))
	Identifier.setKind(clang::tok::identifier);
	else
	Identifier.setKind(II->getTokenID());

	return II;
	}

	void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
	PoisonReasons[II] = DiagID;
	}

	void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
	assert(Ident__exception_code && Ident__exception_info);
	assert(Ident___exception_code && Ident___exception_info);
	Ident__exception_code->setIsPoisoned(Poison);
	Ident___exception_code->setIsPoisoned(Poison);
	Ident_GetExceptionCode->setIsPoisoned(Poison);
	Ident__exception_info->setIsPoisoned(Poison);
	Ident___exception_info->setIsPoisoned(Poison);
	Ident_GetExceptionInfo->setIsPoisoned(Poison);
	Ident__abnormal_termination->setIsPoisoned(Poison);
	Ident___abnormal_termination->setIsPoisoned(Poison);
	Ident_AbnormalTermination->setIsPoisoned(Poison);
	}

	void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
	assert(Identifier.getIdentifierInfo() &&
	"Can't handle identifiers without identifier info!");
	llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
	PoisonReasons.find(Identifier.getIdentifierInfo());
	if(it == PoisonReasons.end())
	Diag(Identifier, diag::err_pp_used_poisoned_id);
	else
	Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
	}

	/// \brief Returns a diagnostic message kind for reporting a future keyword as
	/// appropriate for the identifier and specified language.
	static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
	const LangOptions &LangOpts) {
	assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");

	if (LangOpts.CPlusPlus)
	return llvm::StringSwitch<diag::kind>(II.getName())
	#define CXX11_KEYWORD(NAME, FLAGS) \
	.Case(#NAME, diag::warn_cxx11_keyword)
	#include "clang/Basic/TokenKinds.def"
	;

	llvm_unreachable(
	"Keyword not known to come from a newer Standard or proposed Standard");
	}

	void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
	assert(II.isOutOfDate() && "not out of date");
	getExternalSource()->updateOutOfDateIdentifier(II);
	}

	/// HandleIdentifier - This callback is invoked when the lexer reads an
	/// identifier. This callback looks up the identifier in the map and/or
	/// potentially macro expands it or turns it into a named token (like 'for').
	///
	/// Note that callers of this method are guarded by checking the
	/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
	/// IdentifierInfo methods that compute these properties will need to change to
	/// match.
	bool Preprocessor::HandleIdentifier(Token &Identifier) {
	assert(Identifier.getIdentifierInfo() &&
	"Can't handle identifiers without identifier info!");

	IdentifierInfo &II = *Identifier.getIdentifierInfo();

	// If the information about this identifier is out of date, update it from
	// the external source.
	// We have to treat __VA_ARGS__ in a special way, since it gets
	// serialized with isPoisoned = true, but our preprocessor may have
	// unpoisoned it if we're defining a C99 macro.
	if (II.isOutOfDate()) {
	bool CurrentIsPoisoned = false;
	if (&II == Ident__VA_ARGS__)
	CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();

	updateOutOfDateIdentifier(II);
	Identifier.setKind(II.getTokenID());

	if (&II == Ident__VA_ARGS__)
	II.setIsPoisoned(CurrentIsPoisoned);
	}

	// If this identifier was poisoned, and if it was not produced from a macro
	// expansion, emit an error.
	if (II.isPoisoned() && CurPPLexer) {
	HandlePoisonedIdentifier(Identifier);
	}

	// If this is a macro to be expanded, do it.
	if (MacroDefinition MD = getMacroDefinition(&II)) {
	auto *MI = MD.getMacroInfo();
	assert(MI && "macro definition with no macro info?");
	if (!DisableMacroExpansion) {
	if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
	// C99 6.10.3p10: If the preprocessing token immediately after the
	// macro name isn't a '(', this macro should not be expanded.
	if (!MI->isFunctionLike() \|\| isNextPPTokenLParen())
	return HandleMacroExpandedIdentifier(Identifier, MD);
	} else {
	// C99 6.10.3.4p2 says that a disabled macro may never again be
	// expanded, even if it's in a context where it could be expanded in the
	// future.
	Identifier.setFlag(Token::DisableExpand);
	if (MI->isObjectLike() \|\| isNextPPTokenLParen())
	Diag(Identifier, diag::pp_disabled_macro_expansion);
	}
	}
	}

	// If this identifier is a keyword in a newer Standard or proposed Standard,
	// produce a warning. Don't warn if we're not considering macro expansion,
	// since this identifier might be the name of a macro.
	// FIXME: This warning is disabled in cases where it shouldn't be, like
	// "#define constexpr constexpr", "int constexpr;"
	if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
	Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
	<< II.getName();
	// Don't diagnose this keyword again in this translation unit.
	II.setIsFutureCompatKeyword(false);
	}

	// If this is an extension token, diagnose its use.
	// We avoid diagnosing tokens that originate from macro definitions.
	// FIXME: This warning is disabled in cases where it shouldn't be,
	// like "#define TY typeof", "TY(1) x".
	if (II.isExtensionToken() && !DisableMacroExpansion)
	Diag(Identifier, diag::ext_token_used);

	// If this is the 'import' contextual keyword following an '@', note
	// that the next token indicates a module name.
	//
	// Note that we do not treat 'import' as a contextual
	// keyword when we're in a caching lexer, because caching lexers only get
	// used in contexts where import declarations are disallowed.
	//
	// Likewise if this is the C++ Modules TS import keyword.
	if (((LastTokenWasAt && II.isModulesImport()) \|\|
	Identifier.is(tok::kw_import)) &&
	!InMacroArgs && !DisableMacroExpansion &&
	(getLangOpts().Modules \|\| getLangOpts().DebuggerSupport) &&
	CurLexerKind != CLK_CachingLexer) {
	ModuleImportLoc = Identifier.getLocation();
	ModuleImportPath.clear();
	ModuleImportExpectsIdentifier = true;
	CurLexerKind = CLK_LexAfterModuleImport;
	}
	return true;
	}

	void Preprocessor::Lex(Token &Result) {
	// We loop here until a lex function returns a token; this avoids recursion.
	bool ReturnedToken;
	do {
	switch (CurLexerKind) {
	case CLK_Lexer:
	ReturnedToken = CurLexer->Lex(Result);
	break;
	case CLK_PTHLexer:
	ReturnedToken = CurPTHLexer->Lex(Result);
	break;
	case CLK_TokenLexer:
	ReturnedToken = CurTokenLexer->Lex(Result);
	break;
	case CLK_CachingLexer:
	CachingLex(Result);
	ReturnedToken = true;
	break;
	case CLK_LexAfterModuleImport:
	LexAfterModuleImport(Result);
	ReturnedToken = true;
	break;
	}
	} while (!ReturnedToken);

	if (Result.is(tok::code_completion))
	setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());

	LastTokenWasAt = Result.is(tok::at);
	}

	/// \brief Lex a token following the 'import' contextual keyword.
	///
	void Preprocessor::LexAfterModuleImport(Token &Result) {
	// Figure out what kind of lexer we actually have.
	recomputeCurLexerKind();

	// Lex the next token.
	Lex(Result);

	// The token sequence
	//
	// import identifier (. identifier)*
	//
	// indicates a module import directive. We already saw the 'import'
	// contextual keyword, so now we're looking for the identifiers.
	if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
	// We expected to see an identifier here, and we did; continue handling
	// identifiers.
	ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
	Result.getLocation()));
	ModuleImportExpectsIdentifier = false;
	CurLexerKind = CLK_LexAfterModuleImport;
	return;
	}

	// If we're expecting a '.' or a ';', and we got a '.', then wait until we
	// see the next identifier. (We can also see a '[[' that begins an
	// attribute-specifier-seq here under the C++ Modules TS.)
	if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
	ModuleImportExpectsIdentifier = true;
	CurLexerKind = CLK_LexAfterModuleImport;
	return;
	}

	// If we have a non-empty module path, load the named module.
	if (!ModuleImportPath.empty()) {
	// Under the Modules TS, the dot is just part of the module name, and not
	// a real hierarachy separator. Flatten such module names now.
	//
	// FIXME: Is this the right level to be performing this transformation?
	std::string FlatModuleName;
	if (getLangOpts().ModulesTS) {
	for (auto &Piece : ModuleImportPath) {
	if (!FlatModuleName.empty())
	FlatModuleName += ".";
	FlatModuleName += Piece.first->getName();
	}
	SourceLocation FirstPathLoc = ModuleImportPath[0].second;
	ModuleImportPath.clear();
	ModuleImportPath.push_back(
	std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
	}

	Module *Imported = nullptr;
	if (getLangOpts().Modules) {
	Imported = TheModuleLoader.loadModule(ModuleImportLoc,
	ModuleImportPath,
	Module::Hidden,
	/IsIncludeDirective=/false);
	if (Imported)
	makeModuleVisible(Imported, ModuleImportLoc);
	}
	if (Callbacks && (getLangOpts().Modules \|\| getLangOpts().DebuggerSupport))
	Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
	}
	}

	void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
	CurSubmoduleState->VisibleModules.setVisible(
	M, Loc, [](Module *) {},
	[&](ArrayRef<Module > Path, Module Conflict, StringRef Message) {
	// FIXME: Include the path in the diagnostic.
	// FIXME: Include the import location for the conflicting module.
	Diag(ModuleImportLoc, diag::warn_module_conflict)
	<< Path[0]->getFullModuleName()
	<< Conflict->getFullModuleName()
	<< Message;
	});

	// Add this module to the imports list of the currently-built submodule.
	if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
	BuildingSubmoduleStack.back().M->Imports.insert(M);
	}

	bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
	const char *DiagnosticTag,
	bool AllowMacroExpansion) {
	// We need at least one string literal.
	if (Result.isNot(tok::string_literal)) {
	Diag(Result, diag::err_expected_string_literal)
	<< /Source='in...'/0 << DiagnosticTag;
	return false;
	}

	// Lex string literal tokens, optionally with macro expansion.
	SmallVector<Token, 4> StrToks;
	do {
	StrToks.push_back(Result);

	if (Result.hasUDSuffix())
	Diag(Result, diag::err_invalid_string_udl);

	if (AllowMacroExpansion)
	Lex(Result);
	else
	LexUnexpandedToken(Result);
	} while (Result.is(tok::string_literal));

	// Concatenate and parse the strings.
	StringLiteralParser Literal(StrToks, *this);
	assert(Literal.isAscii() && "Didn't allow wide strings in");

	if (Literal.hadError)
	return false;

	if (Literal.Pascal) {
	Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
	<< /Source='in...'/0 << DiagnosticTag;
	return false;
	}

	String = Literal.GetString();
	return true;
	}

	bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
	assert(Tok.is(tok::numeric_constant));
	SmallString<8> IntegerBuffer;
	bool NumberInvalid = false;
	StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
	if (NumberInvalid)
	return false;
	NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
	if (Literal.hadError \|\| !Literal.isIntegerLiteral() \|\| Literal.hasUDSuffix())
	return false;
	llvm::APInt APVal(64, 0);
	if (Literal.GetIntegerValue(APVal))
	return false;
	Lex(Tok);
	Value = APVal.getLimitedValue();
	return true;
	}

	void Preprocessor::addCommentHandler(CommentHandler *Handler) {
	assert(Handler && "NULL comment handler");
	assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
	CommentHandlers.end() && "Comment handler already registered");
	CommentHandlers.push_back(Handler);
	}

	void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
	std::vector<CommentHandler *>::iterator Pos
	= std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
	assert(Pos != CommentHandlers.end() && "Comment handler not registered");
	CommentHandlers.erase(Pos);
	}

	bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
	bool AnyPendingTokens = false;
	for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
	HEnd = CommentHandlers.end();
	H != HEnd; ++H) {
	if ((H)->HandleComment(this, Comment))
	AnyPendingTokens = true;
	}
	if (!AnyPendingTokens \|\| getCommentRetentionState())
	return false;
	Lex(result);
	return true;
	}

	ModuleLoader::~ModuleLoader() { }

	CommentHandler::~CommentHandler() { }

	CodeCompletionHandler::~CodeCompletionHandler() { }

	void Preprocessor::createPreprocessingRecord() {
	if (Record)
	return;

	Record = new PreprocessingRecord(getSourceManager());
	addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
	}
	Index: head/contrib/llvm/tools/clang/lib/Parse/Parser.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Parse/Parser.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/Parse/Parser.cpp (revision 322855)
	@@ -1,2248 +1,2246 @@
	//===--- Parser.cpp - C Language Family Parser ----------------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the Parser interfaces.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/Parse/Parser.h"
	#include "clang/AST/ASTConsumer.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/Parse/ParseDiagnostic.h"
	#include "clang/Parse/RAIIObjectsForParser.h"
	#include "clang/Sema/DeclSpec.h"
	#include "clang/Sema/ParsedTemplate.h"
	#include "clang/Sema/Scope.h"
	using namespace clang;


	namespace {
	/// \brief A comment handler that passes comments found by the preprocessor
	/// to the parser action.
	class ActionCommentHandler : public CommentHandler {
	Sema &S;

	public:
	explicit ActionCommentHandler(Sema &S) : S(S) { }

	bool HandleComment(Preprocessor &PP, SourceRange Comment) override {
	S.ActOnComment(Comment);
	return false;
	}
	};
	} // end anonymous namespace

	IdentifierInfo *Parser::getSEHExceptKeyword() {
	// __except is accepted as a (contextual) keyword
	if (!Ident__except && (getLangOpts().MicrosoftExt \|\| getLangOpts().Borland))
	Ident__except = PP.getIdentifierInfo("__except");

	return Ident__except;
	}

	Parser::Parser(Preprocessor &pp, Sema &actions, bool skipFunctionBodies)
	: PP(pp), Actions(actions), Diags(PP.getDiagnostics()),
	GreaterThanIsOperator(true), ColonIsSacred(false),
	InMessageExpression(false), TemplateParameterDepth(0),
	ParsingInObjCContainer(false) {
	SkipFunctionBodies = pp.isCodeCompletionEnabled() \|\| skipFunctionBodies;
	Tok.startToken();
	Tok.setKind(tok::eof);
	Actions.CurScope = nullptr;
	NumCachedScopes = 0;
	CurParsedObjCImpl = nullptr;

	// Add #pragma handlers. These are removed and destroyed in the
	// destructor.
	initializePragmaHandlers();

	CommentSemaHandler.reset(new ActionCommentHandler(actions));
	PP.addCommentHandler(CommentSemaHandler.get());

	PP.setCodeCompletionHandler(*this);
	}

	DiagnosticBuilder Parser::Diag(SourceLocation Loc, unsigned DiagID) {
	return Diags.Report(Loc, DiagID);
	}

	DiagnosticBuilder Parser::Diag(const Token &Tok, unsigned DiagID) {
	return Diag(Tok.getLocation(), DiagID);
	}

	/// \brief Emits a diagnostic suggesting parentheses surrounding a
	/// given range.
	///
	/// \param Loc The location where we'll emit the diagnostic.
	/// \param DK The kind of diagnostic to emit.
	/// \param ParenRange Source range enclosing code that should be parenthesized.
	void Parser::SuggestParentheses(SourceLocation Loc, unsigned DK,
	SourceRange ParenRange) {
	SourceLocation EndLoc = PP.getLocForEndOfToken(ParenRange.getEnd());
	if (!ParenRange.getEnd().isFileID() \|\| EndLoc.isInvalid()) {
	// We can't display the parentheses, so just dig the
	// warning/error and return.
	Diag(Loc, DK);
	return;
	}

	Diag(Loc, DK)
	<< FixItHint::CreateInsertion(ParenRange.getBegin(), "(")
	<< FixItHint::CreateInsertion(EndLoc, ")");
	}

	static bool IsCommonTypo(tok::TokenKind ExpectedTok, const Token &Tok) {
	switch (ExpectedTok) {
	case tok::semi:
	return Tok.is(tok::colon) \|\| Tok.is(tok::comma); // : or , for ;
	default: return false;
	}
	}

	bool Parser::ExpectAndConsume(tok::TokenKind ExpectedTok, unsigned DiagID,
	StringRef Msg) {
	if (Tok.is(ExpectedTok) \|\| Tok.is(tok::code_completion)) {
	ConsumeAnyToken();
	return false;
	}

	// Detect common single-character typos and resume.
	if (IsCommonTypo(ExpectedTok, Tok)) {
	SourceLocation Loc = Tok.getLocation();
	{
	DiagnosticBuilder DB = Diag(Loc, DiagID);
	DB << FixItHint::CreateReplacement(
	SourceRange(Loc), tok::getPunctuatorSpelling(ExpectedTok));
	if (DiagID == diag::err_expected)
	DB << ExpectedTok;
	else if (DiagID == diag::err_expected_after)
	DB << Msg << ExpectedTok;
	else
	DB << Msg;
	}

	// Pretend there wasn't a problem.
	ConsumeAnyToken();
	return false;
	}

	SourceLocation EndLoc = PP.getLocForEndOfToken(PrevTokLocation);
	const char *Spelling = nullptr;
	if (EndLoc.isValid())
	Spelling = tok::getPunctuatorSpelling(ExpectedTok);

	DiagnosticBuilder DB =
	Spelling
	? Diag(EndLoc, DiagID) << FixItHint::CreateInsertion(EndLoc, Spelling)
	: Diag(Tok, DiagID);
	if (DiagID == diag::err_expected)
	DB << ExpectedTok;
	else if (DiagID == diag::err_expected_after)
	DB << Msg << ExpectedTok;
	else
	DB << Msg;

	return true;
	}

	bool Parser::ExpectAndConsumeSemi(unsigned DiagID) {
	if (TryConsumeToken(tok::semi))
	return false;

	if (Tok.is(tok::code_completion)) {
	handleUnexpectedCodeCompletionToken();
	return false;
	}

	if ((Tok.is(tok::r_paren) \|\| Tok.is(tok::r_square)) &&
	NextToken().is(tok::semi)) {
	Diag(Tok, diag::err_extraneous_token_before_semi)
	<< PP.getSpelling(Tok)
	<< FixItHint::CreateRemoval(Tok.getLocation());
	ConsumeAnyToken(); // The ')' or ']'.
	ConsumeToken(); // The ';'.
	return false;
	}

	return ExpectAndConsume(tok::semi, DiagID);
	}

	void Parser::ConsumeExtraSemi(ExtraSemiKind Kind, unsigned TST) {
	if (!Tok.is(tok::semi)) return;

	bool HadMultipleSemis = false;
	SourceLocation StartLoc = Tok.getLocation();
	SourceLocation EndLoc = Tok.getLocation();
	ConsumeToken();

	while ((Tok.is(tok::semi) && !Tok.isAtStartOfLine())) {
	HadMultipleSemis = true;
	EndLoc = Tok.getLocation();
	ConsumeToken();
	}

	// C++11 allows extra semicolons at namespace scope, but not in any of the
	// other contexts.
	if (Kind == OutsideFunction && getLangOpts().CPlusPlus) {
	if (getLangOpts().CPlusPlus11)
	Diag(StartLoc, diag::warn_cxx98_compat_top_level_semi)
	<< FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc));
	else
	Diag(StartLoc, diag::ext_extra_semi_cxx11)
	<< FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc));
	return;
	}

	if (Kind != AfterMemberFunctionDefinition \|\| HadMultipleSemis)
	Diag(StartLoc, diag::ext_extra_semi)
	<< Kind << DeclSpec::getSpecifierName((DeclSpec::TST)TST,
	Actions.getASTContext().getPrintingPolicy())
	<< FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc));
	else
	// A single semicolon is valid after a member function definition.
	Diag(StartLoc, diag::warn_extra_semi_after_mem_fn_def)
	<< FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc));
	}

	bool Parser::expectIdentifier() {
	if (Tok.is(tok::identifier))
	return false;
	if (const auto *II = Tok.getIdentifierInfo()) {
	if (II->isCPlusPlusKeyword(getLangOpts())) {
	Diag(Tok, diag::err_expected_token_instead_of_objcxx_keyword)
	<< tok::identifier << Tok.getIdentifierInfo();
	// Objective-C++: Recover by treating this keyword as a valid identifier.
	return false;
	}
	}
	Diag(Tok, diag::err_expected) << tok::identifier;
	return true;
	}

	//===----------------------------------------------------------------------===//
	// Error recovery.
	//===----------------------------------------------------------------------===//

	static bool HasFlagsSet(Parser::SkipUntilFlags L, Parser::SkipUntilFlags R) {
	return (static_cast<unsigned>(L) & static_cast<unsigned>(R)) != 0;
	}

	/// SkipUntil - Read tokens until we get to the specified token, then consume
	/// it (unless no flag StopBeforeMatch). Because we cannot guarantee that the
	/// token will ever occur, this skips to the next token, or to some likely
	/// good stopping point. If StopAtSemi is true, skipping will stop at a ';'
	/// character.
	///
	/// If SkipUntil finds the specified token, it returns true, otherwise it
	/// returns false.
	bool Parser::SkipUntil(ArrayRef<tok::TokenKind> Toks, SkipUntilFlags Flags) {
	// We always want this function to skip at least one token if the first token
	// isn't T and if not at EOF.
	bool isFirstTokenSkipped = true;
	while (1) {
	// If we found one of the tokens, stop and return true.
	for (unsigned i = 0, NumToks = Toks.size(); i != NumToks; ++i) {
	if (Tok.is(Toks[i])) {
	if (HasFlagsSet(Flags, StopBeforeMatch)) {
	// Noop, don't consume the token.
	} else {
	ConsumeAnyToken();
	}
	return true;
	}
	}

	// Important special case: The caller has given up and just wants us to
	// skip the rest of the file. Do this without recursing, since we can
	// get here precisely because the caller detected too much recursion.
	if (Toks.size() == 1 && Toks[0] == tok::eof &&
	!HasFlagsSet(Flags, StopAtSemi) &&
	!HasFlagsSet(Flags, StopAtCodeCompletion)) {
	while (Tok.isNot(tok::eof))
	ConsumeAnyToken();
	return true;
	}

	switch (Tok.getKind()) {
	case tok::eof:
	// Ran out of tokens.
	return false;

	case tok::annot_pragma_openmp:
	case tok::annot_pragma_openmp_end:
	// Stop before an OpenMP pragma boundary.
	case tok::annot_module_begin:
	case tok::annot_module_end:
	case tok::annot_module_include:
	// Stop before we change submodules. They generally indicate a "good"
	// place to pick up parsing again (except in the special case where
	// we're trying to skip to EOF).
	return false;

	case tok::code_completion:
	if (!HasFlagsSet(Flags, StopAtCodeCompletion))
	handleUnexpectedCodeCompletionToken();
	return false;

	case tok::l_paren:
	// Recursively skip properly-nested parens.
	ConsumeParen();
	if (HasFlagsSet(Flags, StopAtCodeCompletion))
	SkipUntil(tok::r_paren, StopAtCodeCompletion);
	else
	SkipUntil(tok::r_paren);
	break;
	case tok::l_square:
	// Recursively skip properly-nested square brackets.
	ConsumeBracket();
	if (HasFlagsSet(Flags, StopAtCodeCompletion))
	SkipUntil(tok::r_square, StopAtCodeCompletion);
	else
	SkipUntil(tok::r_square);
	break;
	case tok::l_brace:
	// Recursively skip properly-nested braces.
	ConsumeBrace();
	if (HasFlagsSet(Flags, StopAtCodeCompletion))
	SkipUntil(tok::r_brace, StopAtCodeCompletion);
	else
	SkipUntil(tok::r_brace);
	break;

	// Okay, we found a ']' or '}' or ')', which we think should be balanced.
	// Since the user wasn't looking for this token (if they were, it would
	// already be handled), this isn't balanced. If there is a LHS token at a
	// higher level, we will assume that this matches the unbalanced token
	// and return it. Otherwise, this is a spurious RHS token, which we skip.
	case tok::r_paren:
	if (ParenCount && !isFirstTokenSkipped)
	return false; // Matches something.
	ConsumeParen();
	break;
	case tok::r_square:
	if (BracketCount && !isFirstTokenSkipped)
	return false; // Matches something.
	ConsumeBracket();
	break;
	case tok::r_brace:
	if (BraceCount && !isFirstTokenSkipped)
	return false; // Matches something.
	ConsumeBrace();
	break;

	case tok::semi:
	if (HasFlagsSet(Flags, StopAtSemi))
	return false;
	// FALL THROUGH.
	default:
	// Skip this token.
	ConsumeAnyToken();
	break;
	}
	isFirstTokenSkipped = false;
	}
	}

	//===----------------------------------------------------------------------===//
	// Scope manipulation
	//===----------------------------------------------------------------------===//

	/// EnterScope - Start a new scope.
	void Parser::EnterScope(unsigned ScopeFlags) {
	if (NumCachedScopes) {
	Scope *N = ScopeCache[--NumCachedScopes];
	N->Init(getCurScope(), ScopeFlags);
	Actions.CurScope = N;
	} else {
	Actions.CurScope = new Scope(getCurScope(), ScopeFlags, Diags);
	}
	}

	/// ExitScope - Pop a scope off the scope stack.
	void Parser::ExitScope() {
	assert(getCurScope() && "Scope imbalance!");

	// Inform the actions module that this scope is going away if there are any
	// decls in it.
	Actions.ActOnPopScope(Tok.getLocation(), getCurScope());

	Scope *OldScope = getCurScope();
	Actions.CurScope = OldScope->getParent();

	if (NumCachedScopes == ScopeCacheSize)
	delete OldScope;
	else
	ScopeCache[NumCachedScopes++] = OldScope;
	}

	/// Set the flags for the current scope to ScopeFlags. If ManageFlags is false,
	/// this object does nothing.
	Parser::ParseScopeFlags::ParseScopeFlags(Parser *Self, unsigned ScopeFlags,
	bool ManageFlags)
	: CurScope(ManageFlags ? Self->getCurScope() : nullptr) {
	if (CurScope) {
	OldFlags = CurScope->getFlags();
	CurScope->setFlags(ScopeFlags);
	}
	}

	/// Restore the flags for the current scope to what they were before this
	/// object overrode them.
	Parser::ParseScopeFlags::~ParseScopeFlags() {
	if (CurScope)
	CurScope->setFlags(OldFlags);
	}


	//===----------------------------------------------------------------------===//
	// C99 6.9: External Definitions.
	//===----------------------------------------------------------------------===//

	Parser::~Parser() {
	// If we still have scopes active, delete the scope tree.
	delete getCurScope();
	Actions.CurScope = nullptr;

	// Free the scope cache.
	for (unsigned i = 0, e = NumCachedScopes; i != e; ++i)
	delete ScopeCache[i];

	resetPragmaHandlers();

	PP.removeCommentHandler(CommentSemaHandler.get());

	PP.clearCodeCompletionHandler();

	if (getLangOpts().DelayedTemplateParsing &&
	!PP.isIncrementalProcessingEnabled() && !TemplateIds.empty()) {
	// If an ASTConsumer parsed delay-parsed templates in their
	// HandleTranslationUnit() method, TemplateIds created there were not
	// guarded by a DestroyTemplateIdAnnotationsRAIIObj object in
	// ParseTopLevelDecl(). Destroy them here.
	DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(TemplateIds);
	}

	assert(TemplateIds.empty() && "Still alive TemplateIdAnnotations around?");
	}

	/// Initialize - Warm up the parser.
	///
	void Parser::Initialize() {
	// Create the translation unit scope. Install it as the current scope.
	assert(getCurScope() == nullptr && "A scope is already active?");
	EnterScope(Scope::DeclScope);
	Actions.ActOnTranslationUnitScope(getCurScope());

	// Initialization for Objective-C context sensitive keywords recognition.
	// Referenced in Parser::ParseObjCTypeQualifierList.
	if (getLangOpts().ObjC1) {
	ObjCTypeQuals[objc_in] = &PP.getIdentifierTable().get("in");
	ObjCTypeQuals[objc_out] = &PP.getIdentifierTable().get("out");
	ObjCTypeQuals[objc_inout] = &PP.getIdentifierTable().get("inout");
	ObjCTypeQuals[objc_oneway] = &PP.getIdentifierTable().get("oneway");
	ObjCTypeQuals[objc_bycopy] = &PP.getIdentifierTable().get("bycopy");
	ObjCTypeQuals[objc_byref] = &PP.getIdentifierTable().get("byref");
	ObjCTypeQuals[objc_nonnull] = &PP.getIdentifierTable().get("nonnull");
	ObjCTypeQuals[objc_nullable] = &PP.getIdentifierTable().get("nullable");
	ObjCTypeQuals[objc_null_unspecified]
	= &PP.getIdentifierTable().get("null_unspecified");
	}

	Ident_instancetype = nullptr;
	Ident_final = nullptr;
	Ident_sealed = nullptr;
	Ident_override = nullptr;
	Ident_GNU_final = nullptr;

	Ident_super = &PP.getIdentifierTable().get("super");

	Ident_vector = nullptr;
	Ident_bool = nullptr;
	Ident_pixel = nullptr;
	if (getLangOpts().AltiVec \|\| getLangOpts().ZVector) {
	Ident_vector = &PP.getIdentifierTable().get("vector");
	Ident_bool = &PP.getIdentifierTable().get("bool");
	}
	if (getLangOpts().AltiVec)
	Ident_pixel = &PP.getIdentifierTable().get("pixel");

	Ident_introduced = nullptr;
	Ident_deprecated = nullptr;
	Ident_obsoleted = nullptr;
	Ident_unavailable = nullptr;
	Ident_strict = nullptr;
	Ident_replacement = nullptr;

	Ident_language = Ident_defined_in = Ident_generated_declaration = nullptr;

	Ident__except = nullptr;

	Ident__exception_code = Ident__exception_info = nullptr;
	Ident__abnormal_termination = Ident___exception_code = nullptr;
	Ident___exception_info = Ident___abnormal_termination = nullptr;
	Ident_GetExceptionCode = Ident_GetExceptionInfo = nullptr;
	Ident_AbnormalTermination = nullptr;

	if(getLangOpts().Borland) {
	Ident__exception_info = PP.getIdentifierInfo("_exception_info");
	Ident___exception_info = PP.getIdentifierInfo("__exception_info");
	Ident_GetExceptionInfo = PP.getIdentifierInfo("GetExceptionInformation");
	Ident__exception_code = PP.getIdentifierInfo("_exception_code");
	Ident___exception_code = PP.getIdentifierInfo("__exception_code");
	Ident_GetExceptionCode = PP.getIdentifierInfo("GetExceptionCode");
	Ident__abnormal_termination = PP.getIdentifierInfo("_abnormal_termination");
	Ident___abnormal_termination = PP.getIdentifierInfo("__abnormal_termination");
	Ident_AbnormalTermination = PP.getIdentifierInfo("AbnormalTermination");

	PP.SetPoisonReason(Ident__exception_code,diag::err_seh___except_block);
	PP.SetPoisonReason(Ident___exception_code,diag::err_seh___except_block);
	PP.SetPoisonReason(Ident_GetExceptionCode,diag::err_seh___except_block);
	PP.SetPoisonReason(Ident__exception_info,diag::err_seh___except_filter);
	PP.SetPoisonReason(Ident___exception_info,diag::err_seh___except_filter);
	PP.SetPoisonReason(Ident_GetExceptionInfo,diag::err_seh___except_filter);
	PP.SetPoisonReason(Ident__abnormal_termination,diag::err_seh___finally_block);
	PP.SetPoisonReason(Ident___abnormal_termination,diag::err_seh___finally_block);
	PP.SetPoisonReason(Ident_AbnormalTermination,diag::err_seh___finally_block);
	}

	Actions.Initialize();

	// Prime the lexer look-ahead.
	ConsumeToken();
	-
	- PP.replayPreambleConditionalStack();
	}

	void Parser::LateTemplateParserCleanupCallback(void *P) {
	// While this RAII helper doesn't bracket any actual work, the destructor will
	// clean up annotations that were created during ActOnEndOfTranslationUnit
	// when incremental processing is enabled.
	DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(((Parser *)P)->TemplateIds);
	}

	bool Parser::ParseFirstTopLevelDecl(DeclGroupPtrTy &Result) {
	Actions.ActOnStartOfTranslationUnit();

	// C11 6.9p1 says translation units must have at least one top-level
	// declaration. C++ doesn't have this restriction. We also don't want to
	// complain if we have a precompiled header, although technically if the PCH
	// is empty we should still emit the (pedantic) diagnostic.
	bool NoTopLevelDecls = ParseTopLevelDecl(Result);
	if (NoTopLevelDecls && !Actions.getASTContext().getExternalSource() &&
	!getLangOpts().CPlusPlus)
	Diag(diag::ext_empty_translation_unit);

	return NoTopLevelDecls;
	}

	/// ParseTopLevelDecl - Parse one top-level declaration, return whatever the
	/// action tells us to. This returns true if the EOF was encountered.
	bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result) {
	DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(TemplateIds);

	// Skip over the EOF token, flagging end of previous input for incremental
	// processing
	if (PP.isIncrementalProcessingEnabled() && Tok.is(tok::eof))
	ConsumeToken();

	Result = nullptr;
	switch (Tok.getKind()) {
	case tok::annot_pragma_unused:
	HandlePragmaUnused();
	return false;

	case tok::kw_import:
	Result = ParseModuleImport(SourceLocation());
	return false;

	case tok::kw_export:
	if (NextToken().isNot(tok::kw_module))
	break;
	LLVM_FALLTHROUGH;
	case tok::kw_module:
	Result = ParseModuleDecl();
	return false;

	case tok::annot_module_include:
	Actions.ActOnModuleInclude(Tok.getLocation(),
	reinterpret_cast<Module *>(
	Tok.getAnnotationValue()));
	ConsumeAnnotationToken();
	return false;

	case tok::annot_module_begin:
	Actions.ActOnModuleBegin(Tok.getLocation(), reinterpret_cast<Module *>(
	Tok.getAnnotationValue()));
	ConsumeAnnotationToken();
	return false;

	case tok::annot_module_end:
	Actions.ActOnModuleEnd(Tok.getLocation(), reinterpret_cast<Module *>(
	Tok.getAnnotationValue()));
	ConsumeAnnotationToken();
	return false;

	case tok::annot_pragma_attribute:
	HandlePragmaAttribute();
	return false;

	case tok::eof:
	// Late template parsing can begin.
	if (getLangOpts().DelayedTemplateParsing)
	Actions.SetLateTemplateParser(LateTemplateParserCallback,
	PP.isIncrementalProcessingEnabled() ?
	LateTemplateParserCleanupCallback : nullptr,
	this);
	if (!PP.isIncrementalProcessingEnabled())
	Actions.ActOnEndOfTranslationUnit();
	//else don't tell Sema that we ended parsing: more input might come.
	return true;

	default:
	break;
	}

	ParsedAttributesWithRange attrs(AttrFactory);
	MaybeParseCXX11Attributes(attrs);

	Result = ParseExternalDeclaration(attrs);
	return false;
	}

	/// ParseExternalDeclaration:
	///
	/// external-declaration: [C99 6.9], declaration: [C++ dcl.dcl]
	/// function-definition
	/// declaration
	/// [GNU] asm-definition
	/// [GNU] __extension__ external-declaration
	/// [OBJC] objc-class-definition
	/// [OBJC] objc-class-declaration
	/// [OBJC] objc-alias-declaration
	/// [OBJC] objc-protocol-definition
	/// [OBJC] objc-method-definition
	/// [OBJC] @end
	/// [C++] linkage-specification
	/// [GNU] asm-definition:
	/// simple-asm-expr ';'
	/// [C++11] empty-declaration
	/// [C++11] attribute-declaration
	///
	/// [C++11] empty-declaration:
	/// ';'
	///
	/// [C++0x/GNU] 'extern' 'template' declaration
	Parser::DeclGroupPtrTy
	Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
	ParsingDeclSpec *DS) {
	DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(TemplateIds);
	ParenBraceBracketBalancer BalancerRAIIObj(*this);

	if (PP.isCodeCompletionReached()) {
	cutOffParsing();
	return nullptr;
	}

	Decl *SingleDecl = nullptr;
	switch (Tok.getKind()) {
	case tok::annot_pragma_vis:
	HandlePragmaVisibility();
	return nullptr;
	case tok::annot_pragma_pack:
	HandlePragmaPack();
	return nullptr;
	case tok::annot_pragma_msstruct:
	HandlePragmaMSStruct();
	return nullptr;
	case tok::annot_pragma_align:
	HandlePragmaAlign();
	return nullptr;
	case tok::annot_pragma_weak:
	HandlePragmaWeak();
	return nullptr;
	case tok::annot_pragma_weakalias:
	HandlePragmaWeakAlias();
	return nullptr;
	case tok::annot_pragma_redefine_extname:
	HandlePragmaRedefineExtname();
	return nullptr;
	case tok::annot_pragma_fp_contract:
	HandlePragmaFPContract();
	return nullptr;
	case tok::annot_pragma_fp:
	HandlePragmaFP();
	break;
	case tok::annot_pragma_opencl_extension:
	HandlePragmaOpenCLExtension();
	return nullptr;
	case tok::annot_pragma_openmp: {
	AccessSpecifier AS = AS_none;
	return ParseOpenMPDeclarativeDirectiveWithExtDecl(AS, attrs);
	}
	case tok::annot_pragma_ms_pointers_to_members:
	HandlePragmaMSPointersToMembers();
	return nullptr;
	case tok::annot_pragma_ms_vtordisp:
	HandlePragmaMSVtorDisp();
	return nullptr;
	case tok::annot_pragma_ms_pragma:
	HandlePragmaMSPragma();
	return nullptr;
	case tok::annot_pragma_dump:
	HandlePragmaDump();
	return nullptr;
	case tok::semi:
	// Either a C++11 empty-declaration or attribute-declaration.
	SingleDecl = Actions.ActOnEmptyDeclaration(getCurScope(),
	attrs.getList(),
	Tok.getLocation());
	ConsumeExtraSemi(OutsideFunction);
	break;
	case tok::r_brace:
	Diag(Tok, diag::err_extraneous_closing_brace);
	ConsumeBrace();
	return nullptr;
	case tok::eof:
	Diag(Tok, diag::err_expected_external_declaration);
	return nullptr;
	case tok::kw___extension__: {
	// __extension__ silences extension warnings in the subexpression.
	ExtensionRAIIObject O(Diags); // Use RAII to do this.
	ConsumeToken();
	return ParseExternalDeclaration(attrs);
	}
	case tok::kw_asm: {
	ProhibitAttributes(attrs);

	SourceLocation StartLoc = Tok.getLocation();
	SourceLocation EndLoc;

	ExprResult Result(ParseSimpleAsm(&EndLoc));

	// Check if GNU-style InlineAsm is disabled.
	// Empty asm string is allowed because it will not introduce
	// any assembly code.
	if (!(getLangOpts().GNUAsm \|\| Result.isInvalid())) {
	const auto *SL = cast<StringLiteral>(Result.get());
	if (!SL->getString().trim().empty())
	Diag(StartLoc, diag::err_gnu_inline_asm_disabled);
	}

	ExpectAndConsume(tok::semi, diag::err_expected_after,
	"top-level asm block");

	if (Result.isInvalid())
	return nullptr;
	SingleDecl = Actions.ActOnFileScopeAsmDecl(Result.get(), StartLoc, EndLoc);
	break;
	}
	case tok::at:
	return ParseObjCAtDirectives();
	case tok::minus:
	case tok::plus:
	if (!getLangOpts().ObjC1) {
	Diag(Tok, diag::err_expected_external_declaration);
	ConsumeToken();
	return nullptr;
	}
	SingleDecl = ParseObjCMethodDefinition();
	break;
	case tok::code_completion:
	Actions.CodeCompleteOrdinaryName(getCurScope(),
	CurParsedObjCImpl? Sema::PCC_ObjCImplementation
	: Sema::PCC_Namespace);
	cutOffParsing();
	return nullptr;
	case tok::kw_export:
	if (getLangOpts().ModulesTS) {
	SingleDecl = ParseExportDeclaration();
	break;
	}
	// This must be 'export template'. Parse it so we can diagnose our lack
	// of support.
	LLVM_FALLTHROUGH;
	case tok::kw_using:
	case tok::kw_namespace:
	case tok::kw_typedef:
	case tok::kw_template:
	case tok::kw_static_assert:
	case tok::kw__Static_assert:
	// A function definition cannot start with any of these keywords.
	{
	SourceLocation DeclEnd;
	return ParseDeclaration(Declarator::FileContext, DeclEnd, attrs);
	}

	case tok::kw_static:
	// Parse (then ignore) 'static' prior to a template instantiation. This is
	// a GCC extension that we intentionally do not support.
	if (getLangOpts().CPlusPlus && NextToken().is(tok::kw_template)) {
	Diag(ConsumeToken(), diag::warn_static_inline_explicit_inst_ignored)
	<< 0;
	SourceLocation DeclEnd;
	return ParseDeclaration(Declarator::FileContext, DeclEnd, attrs);
	}
	goto dont_know;

	case tok::kw_inline:
	if (getLangOpts().CPlusPlus) {
	tok::TokenKind NextKind = NextToken().getKind();

	// Inline namespaces. Allowed as an extension even in C++03.
	if (NextKind == tok::kw_namespace) {
	SourceLocation DeclEnd;
	return ParseDeclaration(Declarator::FileContext, DeclEnd, attrs);
	}

	// Parse (then ignore) 'inline' prior to a template instantiation. This is
	// a GCC extension that we intentionally do not support.
	if (NextKind == tok::kw_template) {
	Diag(ConsumeToken(), diag::warn_static_inline_explicit_inst_ignored)
	<< 1;
	SourceLocation DeclEnd;
	return ParseDeclaration(Declarator::FileContext, DeclEnd, attrs);
	}
	}
	goto dont_know;

	case tok::kw_extern:
	if (getLangOpts().CPlusPlus && NextToken().is(tok::kw_template)) {
	// Extern templates
	SourceLocation ExternLoc = ConsumeToken();
	SourceLocation TemplateLoc = ConsumeToken();
	Diag(ExternLoc, getLangOpts().CPlusPlus11 ?
	diag::warn_cxx98_compat_extern_template :
	diag::ext_extern_template) << SourceRange(ExternLoc, TemplateLoc);
	SourceLocation DeclEnd;
	return Actions.ConvertDeclToDeclGroup(
	ParseExplicitInstantiation(Declarator::FileContext,
	ExternLoc, TemplateLoc, DeclEnd));
	}
	goto dont_know;

	case tok::kw___if_exists:
	case tok::kw___if_not_exists:
	ParseMicrosoftIfExistsExternalDeclaration();
	return nullptr;

	case tok::kw_module:
	Diag(Tok, diag::err_unexpected_module_decl);
	SkipUntil(tok::semi);
	return nullptr;

	default:
	dont_know:
	if (Tok.isEditorPlaceholder()) {
	ConsumeToken();
	return nullptr;
	}
	// We can't tell whether this is a function-definition or declaration yet.
	return ParseDeclarationOrFunctionDefinition(attrs, DS);
	}

	// This routine returns a DeclGroup, if the thing we parsed only contains a
	// single decl, convert it now.
	return Actions.ConvertDeclToDeclGroup(SingleDecl);
	}

	/// \brief Determine whether the current token, if it occurs after a
	/// declarator, continues a declaration or declaration list.
	bool Parser::isDeclarationAfterDeclarator() {
	// Check for '= delete' or '= default'
	if (getLangOpts().CPlusPlus && Tok.is(tok::equal)) {
	const Token &KW = NextToken();
	if (KW.is(tok::kw_default) \|\| KW.is(tok::kw_delete))
	return false;
	}

	return Tok.is(tok::equal) \|\| // int X()= -> not a function def
	Tok.is(tok::comma) \|\| // int X(), -> not a function def
	Tok.is(tok::semi) \|\| // int X(); -> not a function def
	Tok.is(tok::kw_asm) \|\| // int X() __asm__ -> not a function def
	Tok.is(tok::kw___attribute) \|\| // int X() __attr__ -> not a function def
	(getLangOpts().CPlusPlus &&
	Tok.is(tok::l_paren)); // int X(0) -> not a function def [C++]
	}

	/// \brief Determine whether the current token, if it occurs after a
	/// declarator, indicates the start of a function definition.
	bool Parser::isStartOfFunctionDefinition(const ParsingDeclarator &Declarator) {
	assert(Declarator.isFunctionDeclarator() && "Isn't a function declarator");
	if (Tok.is(tok::l_brace)) // int X() {}
	return true;

	// Handle K&R C argument lists: int X(f) int f; {}
	if (!getLangOpts().CPlusPlus &&
	Declarator.getFunctionTypeInfo().isKNRPrototype())
	return isDeclarationSpecifier();

	if (getLangOpts().CPlusPlus && Tok.is(tok::equal)) {
	const Token &KW = NextToken();
	return KW.is(tok::kw_default) \|\| KW.is(tok::kw_delete);
	}

	return Tok.is(tok::colon) \|\| // X() : Base() {} (used for ctors)
	Tok.is(tok::kw_try); // X() try { ... }
	}

	/// Parse either a function-definition or a declaration. We can't tell which
	/// we have until we read up to the compound-statement in function-definition.
	/// TemplateParams, if non-NULL, provides the template parameters when we're
	/// parsing a C++ template-declaration.
	///
	/// function-definition: [C99 6.9.1]
	/// decl-specs declarator declaration-list[opt] compound-statement
	/// [C90] function-definition: [C99 6.7.1] - implicit int result
	/// [C90] decl-specs[opt] declarator declaration-list[opt] compound-statement
	///
	/// declaration: [C99 6.7]
	/// declaration-specifiers init-declarator-list[opt] ';'
	/// [!C99] init-declarator-list ';' [TODO: warn in c99 mode]
	/// [OMP] threadprivate-directive [TODO]
	///
	Parser::DeclGroupPtrTy
	Parser::ParseDeclOrFunctionDefInternal(ParsedAttributesWithRange &attrs,
	ParsingDeclSpec &DS,
	AccessSpecifier AS) {
	MaybeParseMicrosoftAttributes(DS.getAttributes());
	// Parse the common declaration-specifiers piece.
	ParseDeclarationSpecifiers(DS, ParsedTemplateInfo(), AS, DSC_top_level);

	// If we had a free-standing type definition with a missing semicolon, we
	// may get this far before the problem becomes obvious.
	if (DS.hasTagDefinition() &&
	DiagnoseMissingSemiAfterTagDefinition(DS, AS, DSC_top_level))
	return nullptr;

	// C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };"
	// declaration-specifiers init-declarator-list[opt] ';'
	if (Tok.is(tok::semi)) {
	ProhibitAttributes(attrs);
	ConsumeToken();
	RecordDecl *AnonRecord = nullptr;
	Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none,
	DS, AnonRecord);
	DS.complete(TheDecl);
	if (getLangOpts().OpenCL)
	Actions.setCurrentOpenCLExtensionForDecl(TheDecl);
	if (AnonRecord) {
	Decl* decls[] = {AnonRecord, TheDecl};
	return Actions.BuildDeclaratorGroup(decls);
	}
	return Actions.ConvertDeclToDeclGroup(TheDecl);
	}

	DS.takeAttributesFrom(attrs);

	// ObjC2 allows prefix attributes on class interfaces and protocols.
	// FIXME: This still needs better diagnostics. We should only accept
	// attributes here, no types, etc.
	if (getLangOpts().ObjC2 && Tok.is(tok::at)) {
	SourceLocation AtLoc = ConsumeToken(); // the "@"
	if (!Tok.isObjCAtKeyword(tok::objc_interface) &&
	!Tok.isObjCAtKeyword(tok::objc_protocol)) {
	Diag(Tok, diag::err_objc_unexpected_attr);
	SkipUntil(tok::semi); // FIXME: better skip?
	return nullptr;
	}

	DS.abort();

	const char *PrevSpec = nullptr;
	unsigned DiagID;
	if (DS.SetTypeSpecType(DeclSpec::TST_unspecified, AtLoc, PrevSpec, DiagID,
	Actions.getASTContext().getPrintingPolicy()))
	Diag(AtLoc, DiagID) << PrevSpec;

	if (Tok.isObjCAtKeyword(tok::objc_protocol))
	return ParseObjCAtProtocolDeclaration(AtLoc, DS.getAttributes());

	return Actions.ConvertDeclToDeclGroup(
	ParseObjCAtInterfaceDeclaration(AtLoc, DS.getAttributes()));
	}

	// If the declspec consisted only of 'extern' and we have a string
	// literal following it, this must be a C++ linkage specifier like
	// 'extern "C"'.
	if (getLangOpts().CPlusPlus && isTokenStringLiteral() &&
	DS.getStorageClassSpec() == DeclSpec::SCS_extern &&
	DS.getParsedSpecifiers() == DeclSpec::PQ_StorageClassSpecifier) {
	Decl *TheDecl = ParseLinkage(DS, Declarator::FileContext);
	return Actions.ConvertDeclToDeclGroup(TheDecl);
	}

	return ParseDeclGroup(DS, Declarator::FileContext);
	}

	Parser::DeclGroupPtrTy
	Parser::ParseDeclarationOrFunctionDefinition(ParsedAttributesWithRange &attrs,
	ParsingDeclSpec *DS,
	AccessSpecifier AS) {
	if (DS) {
	return ParseDeclOrFunctionDefInternal(attrs, *DS, AS);
	} else {
	ParsingDeclSpec PDS(*this);
	// Must temporarily exit the objective-c container scope for
	// parsing c constructs and re-enter objc container scope
	// afterwards.
	ObjCDeclContextSwitch ObjCDC(*this);

	return ParseDeclOrFunctionDefInternal(attrs, PDS, AS);
	}
	}

	/// ParseFunctionDefinition - We parsed and verified that the specified
	/// Declarator is well formed. If this is a K&R-style function, read the
	/// parameters declaration-list, then start the compound-statement.
	///
	/// function-definition: [C99 6.9.1]
	/// decl-specs declarator declaration-list[opt] compound-statement
	/// [C90] function-definition: [C99 6.7.1] - implicit int result
	/// [C90] decl-specs[opt] declarator declaration-list[opt] compound-statement
	/// [C++] function-definition: [C++ 8.4]
	/// decl-specifier-seq[opt] declarator ctor-initializer[opt]
	/// function-body
	/// [C++] function-definition: [C++ 8.4]
	/// decl-specifier-seq[opt] declarator function-try-block
	///
	Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D,
	const ParsedTemplateInfo &TemplateInfo,
	LateParsedAttrList *LateParsedAttrs) {
	// Poison SEH identifiers so they are flagged as illegal in function bodies.
	PoisonSEHIdentifiersRAIIObject PoisonSEHIdentifiers(*this, true);
	const DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();

	// If this is C90 and the declspecs were completely missing, fudge in an
	// implicit int. We do this here because this is the only place where
	// declaration-specifiers are completely optional in the grammar.
	if (getLangOpts().ImplicitInt && D.getDeclSpec().isEmpty()) {
	const char *PrevSpec;
	unsigned DiagID;
	const PrintingPolicy &Policy = Actions.getASTContext().getPrintingPolicy();
	D.getMutableDeclSpec().SetTypeSpecType(DeclSpec::TST_int,
	D.getIdentifierLoc(),
	PrevSpec, DiagID,
	Policy);
	D.SetRangeBegin(D.getDeclSpec().getSourceRange().getBegin());
	}

	// If this declaration was formed with a K&R-style identifier list for the
	// arguments, parse declarations for all of the args next.
	// int foo(a,b) int a; float b; {}
	if (FTI.isKNRPrototype())
	ParseKNRParamDeclarations(D);

	// We should have either an opening brace or, in a C++ constructor,
	// we may have a colon.
	if (Tok.isNot(tok::l_brace) &&
	(!getLangOpts().CPlusPlus \|\|
	(Tok.isNot(tok::colon) && Tok.isNot(tok::kw_try) &&
	Tok.isNot(tok::equal)))) {
	Diag(Tok, diag::err_expected_fn_body);

	// Skip over garbage, until we get to '{'. Don't eat the '{'.
	SkipUntil(tok::l_brace, StopAtSemi \| StopBeforeMatch);

	// If we didn't find the '{', bail out.
	if (Tok.isNot(tok::l_brace))
	return nullptr;
	}

	// Check to make sure that any normal attributes are allowed to be on
	// a definition. Late parsed attributes are checked at the end.
	if (Tok.isNot(tok::equal)) {
	AttributeList *DtorAttrs = D.getAttributes();
	while (DtorAttrs) {
	if (DtorAttrs->isKnownToGCC() &&
	!DtorAttrs->isCXX11Attribute()) {
	Diag(DtorAttrs->getLoc(), diag::warn_attribute_on_function_definition)
	<< DtorAttrs->getName();
	}
	DtorAttrs = DtorAttrs->getNext();
	}
	}

	// In delayed template parsing mode, for function template we consume the
	// tokens and store them for late parsing at the end of the translation unit.
	if (getLangOpts().DelayedTemplateParsing && Tok.isNot(tok::equal) &&
	TemplateInfo.Kind == ParsedTemplateInfo::Template &&
	Actions.canDelayFunctionBody(D)) {
	MultiTemplateParamsArg TemplateParameterLists(*TemplateInfo.TemplateParams);

	ParseScope BodyScope(this, Scope::FnScope\|Scope::DeclScope);
	Scope *ParentScope = getCurScope()->getParent();

	D.setFunctionDefinitionKind(FDK_Definition);
	Decl *DP = Actions.HandleDeclarator(ParentScope, D,
	TemplateParameterLists);
	D.complete(DP);
	D.getMutableDeclSpec().abort();

	if (SkipFunctionBodies && (!DP \|\| Actions.canSkipFunctionBody(DP)) &&
	trySkippingFunctionBody()) {
	BodyScope.Exit();
	return Actions.ActOnSkippedFunctionBody(DP);
	}

	CachedTokens Toks;
	LexTemplateFunctionForLateParsing(Toks);

	if (DP) {
	FunctionDecl *FnD = DP->getAsFunction();
	Actions.CheckForFunctionRedefinition(FnD);
	Actions.MarkAsLateParsedTemplate(FnD, DP, Toks);
	}
	return DP;
	}
	else if (CurParsedObjCImpl &&
	!TemplateInfo.TemplateParams &&
	(Tok.is(tok::l_brace) \|\| Tok.is(tok::kw_try) \|\|
	Tok.is(tok::colon)) &&
	Actions.CurContext->isTranslationUnit()) {
	ParseScope BodyScope(this, Scope::FnScope\|Scope::DeclScope);
	Scope *ParentScope = getCurScope()->getParent();

	D.setFunctionDefinitionKind(FDK_Definition);
	Decl *FuncDecl = Actions.HandleDeclarator(ParentScope, D,
	MultiTemplateParamsArg());
	D.complete(FuncDecl);
	D.getMutableDeclSpec().abort();
	if (FuncDecl) {
	// Consume the tokens and store them for later parsing.
	StashAwayMethodOrFunctionBodyTokens(FuncDecl);
	CurParsedObjCImpl->HasCFunction = true;
	return FuncDecl;
	}
	// FIXME: Should we really fall through here?
	}

	// Enter a scope for the function body.
	ParseScope BodyScope(this, Scope::FnScope\|Scope::DeclScope);

	// Tell the actions module that we have entered a function definition with the
	// specified Declarator for the function.
	Sema::SkipBodyInfo SkipBody;
	Decl *Res = Actions.ActOnStartOfFunctionDef(getCurScope(), D,
	TemplateInfo.TemplateParams
	? *TemplateInfo.TemplateParams
	: MultiTemplateParamsArg(),
	&SkipBody);

	if (SkipBody.ShouldSkip) {
	SkipFunctionBody();
	return Res;
	}

	// Break out of the ParsingDeclarator context before we parse the body.
	D.complete(Res);

	// Break out of the ParsingDeclSpec context, too. This const_cast is
	// safe because we're always the sole owner.
	D.getMutableDeclSpec().abort();

	if (TryConsumeToken(tok::equal)) {
	assert(getLangOpts().CPlusPlus && "Only C++ function definitions have '='");

	bool Delete = false;
	SourceLocation KWLoc;
	if (TryConsumeToken(tok::kw_delete, KWLoc)) {
	Diag(KWLoc, getLangOpts().CPlusPlus11
	? diag::warn_cxx98_compat_defaulted_deleted_function
	: diag::ext_defaulted_deleted_function)
	<< 1 /* deleted */;
	Actions.SetDeclDeleted(Res, KWLoc);
	Delete = true;
	} else if (TryConsumeToken(tok::kw_default, KWLoc)) {
	Diag(KWLoc, getLangOpts().CPlusPlus11
	? diag::warn_cxx98_compat_defaulted_deleted_function
	: diag::ext_defaulted_deleted_function)
	<< 0 /* defaulted */;
	Actions.SetDeclDefaulted(Res, KWLoc);
	} else {
	llvm_unreachable("function definition after = not 'delete' or 'default'");
	}

	if (Tok.is(tok::comma)) {
	Diag(KWLoc, diag::err_default_delete_in_multiple_declaration)
	<< Delete;
	SkipUntil(tok::semi);
	} else if (ExpectAndConsume(tok::semi, diag::err_expected_after,
	Delete ? "delete" : "default")) {
	SkipUntil(tok::semi);
	}

	Stmt *GeneratedBody = Res ? Res->getBody() : nullptr;
	Actions.ActOnFinishFunctionBody(Res, GeneratedBody, false);
	return Res;
	}

	if (SkipFunctionBodies && (!Res \|\| Actions.canSkipFunctionBody(Res)) &&
	trySkippingFunctionBody()) {
	BodyScope.Exit();
	Actions.ActOnSkippedFunctionBody(Res);
	return Actions.ActOnFinishFunctionBody(Res, nullptr, false);
	}

	if (Tok.is(tok::kw_try))
	return ParseFunctionTryBlock(Res, BodyScope);

	// If we have a colon, then we're probably parsing a C++
	// ctor-initializer.
	if (Tok.is(tok::colon)) {
	ParseConstructorInitializer(Res);

	// Recover from error.
	if (!Tok.is(tok::l_brace)) {
	BodyScope.Exit();
	Actions.ActOnFinishFunctionBody(Res, nullptr);
	return Res;
	}
	} else
	Actions.ActOnDefaultCtorInitializers(Res);

	// Late attributes are parsed in the same scope as the function body.
	if (LateParsedAttrs)
	ParseLexedAttributeList(*LateParsedAttrs, Res, false, true);

	return ParseFunctionStatementBody(Res, BodyScope);
	}

	void Parser::SkipFunctionBody() {
	if (Tok.is(tok::equal)) {
	SkipUntil(tok::semi);
	return;
	}

	bool IsFunctionTryBlock = Tok.is(tok::kw_try);
	if (IsFunctionTryBlock)
	ConsumeToken();

	CachedTokens Skipped;
	if (ConsumeAndStoreFunctionPrologue(Skipped))
	SkipMalformedDecl();
	else {
	SkipUntil(tok::r_brace);
	while (IsFunctionTryBlock && Tok.is(tok::kw_catch)) {
	SkipUntil(tok::l_brace);
	SkipUntil(tok::r_brace);
	}
	}
	}

	/// ParseKNRParamDeclarations - Parse 'declaration-list[opt]' which provides
	/// types for a function with a K&R-style identifier list for arguments.
	void Parser::ParseKNRParamDeclarations(Declarator &D) {
	// We know that the top-level of this declarator is a function.
	DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();

	// Enter function-declaration scope, limiting any declarators to the
	// function prototype scope, including parameter declarators.
	ParseScope PrototypeScope(this, Scope::FunctionPrototypeScope \|
	Scope::FunctionDeclarationScope \| Scope::DeclScope);

	// Read all the argument declarations.
	while (isDeclarationSpecifier()) {
	SourceLocation DSStart = Tok.getLocation();

	// Parse the common declaration-specifiers piece.
	DeclSpec DS(AttrFactory);
	ParseDeclarationSpecifiers(DS);

	// C99 6.9.1p6: 'each declaration in the declaration list shall have at
	// least one declarator'.
	// NOTE: GCC just makes this an ext-warn. It's not clear what it does with
	// the declarations though. It's trivial to ignore them, really hard to do
	// anything else with them.
	if (TryConsumeToken(tok::semi)) {
	Diag(DSStart, diag::err_declaration_does_not_declare_param);
	continue;
	}

	// C99 6.9.1p6: Declarations shall contain no storage-class specifiers other
	// than register.
	if (DS.getStorageClassSpec() != DeclSpec::SCS_unspecified &&
	DS.getStorageClassSpec() != DeclSpec::SCS_register) {
	Diag(DS.getStorageClassSpecLoc(),
	diag::err_invalid_storage_class_in_func_decl);
	DS.ClearStorageClassSpecs();
	}
	if (DS.getThreadStorageClassSpec() != DeclSpec::TSCS_unspecified) {
	Diag(DS.getThreadStorageClassSpecLoc(),
	diag::err_invalid_storage_class_in_func_decl);
	DS.ClearStorageClassSpecs();
	}

	// Parse the first declarator attached to this declspec.
	Declarator ParmDeclarator(DS, Declarator::KNRTypeListContext);
	ParseDeclarator(ParmDeclarator);

	// Handle the full declarator list.
	while (1) {
	// If attributes are present, parse them.
	MaybeParseGNUAttributes(ParmDeclarator);

	// Ask the actions module to compute the type for this declarator.
	Decl *Param =
	Actions.ActOnParamDeclarator(getCurScope(), ParmDeclarator);

	if (Param &&
	// A missing identifier has already been diagnosed.
	ParmDeclarator.getIdentifier()) {

	// Scan the argument list looking for the correct param to apply this
	// type.
	for (unsigned i = 0; ; ++i) {
	// C99 6.9.1p6: those declarators shall declare only identifiers from
	// the identifier list.
	if (i == FTI.NumParams) {
	Diag(ParmDeclarator.getIdentifierLoc(), diag::err_no_matching_param)
	<< ParmDeclarator.getIdentifier();
	break;
	}

	if (FTI.Params[i].Ident == ParmDeclarator.getIdentifier()) {
	// Reject redefinitions of parameters.
	if (FTI.Params[i].Param) {
	Diag(ParmDeclarator.getIdentifierLoc(),
	diag::err_param_redefinition)
	<< ParmDeclarator.getIdentifier();
	} else {
	FTI.Params[i].Param = Param;
	}
	break;
	}
	}
	}

	// If we don't have a comma, it is either the end of the list (a ';') or
	// an error, bail out.
	if (Tok.isNot(tok::comma))
	break;

	ParmDeclarator.clear();

	// Consume the comma.
	ParmDeclarator.setCommaLoc(ConsumeToken());

	// Parse the next declarator.
	ParseDeclarator(ParmDeclarator);
	}

	// Consume ';' and continue parsing.
	if (!ExpectAndConsumeSemi(diag::err_expected_semi_declaration))
	continue;

	// Otherwise recover by skipping to next semi or mandatory function body.
	if (SkipUntil(tok::l_brace, StopAtSemi \| StopBeforeMatch))
	break;
	TryConsumeToken(tok::semi);
	}

	// The actions module must verify that all arguments were declared.
	Actions.ActOnFinishKNRParamDeclarations(getCurScope(), D, Tok.getLocation());
	}


	/// ParseAsmStringLiteral - This is just a normal string-literal, but is not
	/// allowed to be a wide string, and is not subject to character translation.
	///
	/// [GNU] asm-string-literal:
	/// string-literal
	///
	ExprResult Parser::ParseAsmStringLiteral() {
	if (!isTokenStringLiteral()) {
	Diag(Tok, diag::err_expected_string_literal)
	<< /Source='in...'/0 << "'asm'";
	return ExprError();
	}

	ExprResult AsmString(ParseStringLiteralExpression());
	if (!AsmString.isInvalid()) {
	const auto *SL = cast<StringLiteral>(AsmString.get());
	if (!SL->isAscii()) {
	Diag(Tok, diag::err_asm_operand_wide_string_literal)
	<< SL->isWide()
	<< SL->getSourceRange();
	return ExprError();
	}
	}
	return AsmString;
	}

	/// ParseSimpleAsm
	///
	/// [GNU] simple-asm-expr:
	/// 'asm' '(' asm-string-literal ')'
	///
	ExprResult Parser::ParseSimpleAsm(SourceLocation *EndLoc) {
	assert(Tok.is(tok::kw_asm) && "Not an asm!");
	SourceLocation Loc = ConsumeToken();

	if (Tok.is(tok::kw_volatile)) {
	// Remove from the end of 'asm' to the end of 'volatile'.
	SourceRange RemovalRange(PP.getLocForEndOfToken(Loc),
	PP.getLocForEndOfToken(Tok.getLocation()));

	Diag(Tok, diag::warn_file_asm_volatile)
	<< FixItHint::CreateRemoval(RemovalRange);
	ConsumeToken();
	}

	BalancedDelimiterTracker T(*this, tok::l_paren);
	if (T.consumeOpen()) {
	Diag(Tok, diag::err_expected_lparen_after) << "asm";
	return ExprError();
	}

	ExprResult Result(ParseAsmStringLiteral());

	if (!Result.isInvalid()) {
	// Close the paren and get the location of the end bracket
	T.consumeClose();
	if (EndLoc)
	*EndLoc = T.getCloseLocation();
	} else if (SkipUntil(tok::r_paren, StopAtSemi \| StopBeforeMatch)) {
	if (EndLoc)
	*EndLoc = Tok.getLocation();
	ConsumeParen();
	}

	return Result;
	}

	/// \brief Get the TemplateIdAnnotation from the token and put it in the
	/// cleanup pool so that it gets destroyed when parsing the current top level
	/// declaration is finished.
	TemplateIdAnnotation *Parser::takeTemplateIdAnnotation(const Token &tok) {
	assert(tok.is(tok::annot_template_id) && "Expected template-id token");
	TemplateIdAnnotation *
	Id = static_cast<TemplateIdAnnotation *>(tok.getAnnotationValue());
	return Id;
	}

	void Parser::AnnotateScopeToken(CXXScopeSpec &SS, bool IsNewAnnotation) {
	// Push the current token back into the token stream (or revert it if it is
	// cached) and use an annotation scope token for current token.
	if (PP.isBacktrackEnabled())
	PP.RevertCachedTokens(1);
	else
	PP.EnterToken(Tok);
	Tok.setKind(tok::annot_cxxscope);
	Tok.setAnnotationValue(Actions.SaveNestedNameSpecifierAnnotation(SS));
	Tok.setAnnotationRange(SS.getRange());

	// In case the tokens were cached, have Preprocessor replace them
	// with the annotation token. We don't need to do this if we've
	// just reverted back to a prior state.
	if (IsNewAnnotation)
	PP.AnnotateCachedTokens(Tok);
	}

	/// \brief Attempt to classify the name at the current token position. This may
	/// form a type, scope or primary expression annotation, or replace the token
	/// with a typo-corrected keyword. This is only appropriate when the current
	/// name must refer to an entity which has already been declared.
	///
	/// \param IsAddressOfOperand Must be \c true if the name is preceded by an '&'
	/// and might possibly have a dependent nested name specifier.
	/// \param CCC Indicates how to perform typo-correction for this name. If NULL,
	/// no typo correction will be performed.
	Parser::AnnotatedNameKind
	Parser::TryAnnotateName(bool IsAddressOfOperand,
	std::unique_ptr<CorrectionCandidateCallback> CCC) {
	assert(Tok.is(tok::identifier) \|\| Tok.is(tok::annot_cxxscope));

	const bool EnteringContext = false;
	const bool WasScopeAnnotation = Tok.is(tok::annot_cxxscope);

	CXXScopeSpec SS;
	if (getLangOpts().CPlusPlus &&
	ParseOptionalCXXScopeSpecifier(SS, nullptr, EnteringContext))
	return ANK_Error;

	if (Tok.isNot(tok::identifier) \|\| SS.isInvalid()) {
	if (TryAnnotateTypeOrScopeTokenAfterScopeSpec(SS, !WasScopeAnnotation))
	return ANK_Error;
	return ANK_Unresolved;
	}

	IdentifierInfo *Name = Tok.getIdentifierInfo();
	SourceLocation NameLoc = Tok.getLocation();

	// FIXME: Move the tentative declaration logic into ClassifyName so we can
	// typo-correct to tentatively-declared identifiers.
	if (isTentativelyDeclared(Name)) {
	// Identifier has been tentatively declared, and thus cannot be resolved as
	// an expression. Fall back to annotating it as a type.
	if (TryAnnotateTypeOrScopeTokenAfterScopeSpec(SS, !WasScopeAnnotation))
	return ANK_Error;
	return Tok.is(tok::annot_typename) ? ANK_Success : ANK_TentativeDecl;
	}

	Token Next = NextToken();

	// Look up and classify the identifier. We don't perform any typo-correction
	// after a scope specifier, because in general we can't recover from typos
	// there (eg, after correcting 'A::tempalte B<X>::C' [sic], we would need to
	// jump back into scope specifier parsing).
	Sema::NameClassification Classification = Actions.ClassifyName(
	getCurScope(), SS, Name, NameLoc, Next, IsAddressOfOperand,
	SS.isEmpty() ? std::move(CCC) : nullptr);

	switch (Classification.getKind()) {
	case Sema::NC_Error:
	return ANK_Error;

	case Sema::NC_Keyword:
	// The identifier was typo-corrected to a keyword.
	Tok.setIdentifierInfo(Name);
	Tok.setKind(Name->getTokenID());
	PP.TypoCorrectToken(Tok);
	if (SS.isNotEmpty())
	AnnotateScopeToken(SS, !WasScopeAnnotation);
	// We've "annotated" this as a keyword.
	return ANK_Success;

	case Sema::NC_Unknown:
	// It's not something we know about. Leave it unannotated.
	break;

	case Sema::NC_Type: {
	SourceLocation BeginLoc = NameLoc;
	if (SS.isNotEmpty())
	BeginLoc = SS.getBeginLoc();

	/// An Objective-C object type followed by '<' is a specialization of
	/// a parameterized class type or a protocol-qualified type.
	ParsedType Ty = Classification.getType();
	if (getLangOpts().ObjC1 && NextToken().is(tok::less) &&
	(Ty.get()->isObjCObjectType() \|\|
	Ty.get()->isObjCObjectPointerType())) {
	// Consume the name.
	SourceLocation IdentifierLoc = ConsumeToken();
	SourceLocation NewEndLoc;
	TypeResult NewType
	= parseObjCTypeArgsAndProtocolQualifiers(IdentifierLoc, Ty,
	/consumeLastToken=/false,
	NewEndLoc);
	if (NewType.isUsable())
	Ty = NewType.get();
	else if (Tok.is(tok::eof)) // Nothing to do here, bail out...
	return ANK_Error;
	}

	Tok.setKind(tok::annot_typename);
	setTypeAnnotation(Tok, Ty);
	Tok.setAnnotationEndLoc(Tok.getLocation());
	Tok.setLocation(BeginLoc);
	PP.AnnotateCachedTokens(Tok);
	return ANK_Success;
	}

	case Sema::NC_Expression:
	Tok.setKind(tok::annot_primary_expr);
	setExprAnnotation(Tok, Classification.getExpression());
	Tok.setAnnotationEndLoc(NameLoc);
	if (SS.isNotEmpty())
	Tok.setLocation(SS.getBeginLoc());
	PP.AnnotateCachedTokens(Tok);
	return ANK_Success;

	case Sema::NC_TypeTemplate:
	if (Next.isNot(tok::less)) {
	// This may be a type template being used as a template template argument.
	if (SS.isNotEmpty())
	AnnotateScopeToken(SS, !WasScopeAnnotation);
	return ANK_TemplateName;
	}
	// Fall through.
	case Sema::NC_VarTemplate:
	case Sema::NC_FunctionTemplate: {
	// We have a type, variable or function template followed by '<'.
	ConsumeToken();
	UnqualifiedId Id;
	Id.setIdentifier(Name, NameLoc);
	if (AnnotateTemplateIdToken(
	TemplateTy::make(Classification.getTemplateName()),
	Classification.getTemplateNameKind(), SS, SourceLocation(), Id))
	return ANK_Error;
	return ANK_Success;
	}

	case Sema::NC_NestedNameSpecifier:
	llvm_unreachable("already parsed nested name specifier");
	}

	// Unable to classify the name, but maybe we can annotate a scope specifier.
	if (SS.isNotEmpty())
	AnnotateScopeToken(SS, !WasScopeAnnotation);
	return ANK_Unresolved;
	}

	bool Parser::TryKeywordIdentFallback(bool DisableKeyword) {
	assert(Tok.isNot(tok::identifier));
	Diag(Tok, diag::ext_keyword_as_ident)
	<< PP.getSpelling(Tok)
	<< DisableKeyword;
	if (DisableKeyword)
	Tok.getIdentifierInfo()->revertTokenIDToIdentifier();
	Tok.setKind(tok::identifier);
	return true;
	}

	/// TryAnnotateTypeOrScopeToken - If the current token position is on a
	/// typename (possibly qualified in C++) or a C++ scope specifier not followed
	/// by a typename, TryAnnotateTypeOrScopeToken will replace one or more tokens
	/// with a single annotation token representing the typename or C++ scope
	/// respectively.
	/// This simplifies handling of C++ scope specifiers and allows efficient
	/// backtracking without the need to re-parse and resolve nested-names and
	/// typenames.
	/// It will mainly be called when we expect to treat identifiers as typenames
	/// (if they are typenames). For example, in C we do not expect identifiers
	/// inside expressions to be treated as typenames so it will not be called
	/// for expressions in C.
	/// The benefit for C/ObjC is that a typename will be annotated and
	/// Actions.getTypeName will not be needed to be called again (e.g. getTypeName
	/// will not be called twice, once to check whether we have a declaration
	/// specifier, and another one to get the actual type inside
	/// ParseDeclarationSpecifiers).
	///
	/// This returns true if an error occurred.
	///
	/// Note that this routine emits an error if you call it with ::new or ::delete
	/// as the current tokens, so only call it in contexts where these are invalid.
	bool Parser::TryAnnotateTypeOrScopeToken() {
	assert((Tok.is(tok::identifier) \|\| Tok.is(tok::coloncolon) \|\|
	Tok.is(tok::kw_typename) \|\| Tok.is(tok::annot_cxxscope) \|\|
	Tok.is(tok::kw_decltype) \|\| Tok.is(tok::annot_template_id) \|\|
	Tok.is(tok::kw___super)) &&
	"Cannot be a type or scope token!");

	if (Tok.is(tok::kw_typename)) {
	// MSVC lets you do stuff like:
	// typename typedef T_::D D;
	//
	// We will consume the typedef token here and put it back after we have
	// parsed the first identifier, transforming it into something more like:
	// typename T_::D typedef D;
	if (getLangOpts().MSVCCompat && NextToken().is(tok::kw_typedef)) {
	Token TypedefToken;
	PP.Lex(TypedefToken);
	bool Result = TryAnnotateTypeOrScopeToken();
	PP.EnterToken(Tok);
	Tok = TypedefToken;
	if (!Result)
	Diag(Tok.getLocation(), diag::warn_expected_qualified_after_typename);
	return Result;
	}

	// Parse a C++ typename-specifier, e.g., "typename T::type".
	//
	// typename-specifier:
	// 'typename' '::' [opt] nested-name-specifier identifier
	// 'typename' '::' [opt] nested-name-specifier template [opt]
	// simple-template-id
	SourceLocation TypenameLoc = ConsumeToken();
	CXXScopeSpec SS;
	if (ParseOptionalCXXScopeSpecifier(SS, /ObjectType=/nullptr,
	/EnteringContext=/false, nullptr,
	/IsTypename/ true))
	return true;
	if (!SS.isSet()) {
	if (Tok.is(tok::identifier) \|\| Tok.is(tok::annot_template_id) \|\|
	Tok.is(tok::annot_decltype)) {
	// Attempt to recover by skipping the invalid 'typename'
	if (Tok.is(tok::annot_decltype) \|\|
	(!TryAnnotateTypeOrScopeToken() && Tok.isAnnotation())) {
	unsigned DiagID = diag::err_expected_qualified_after_typename;
	// MS compatibility: MSVC permits using known types with typename.
	// e.g. "typedef typename T* pointer_type"
	if (getLangOpts().MicrosoftExt)
	DiagID = diag::warn_expected_qualified_after_typename;
	Diag(Tok.getLocation(), DiagID);
	return false;
	}
	}
	if (Tok.isEditorPlaceholder())
	return true;

	Diag(Tok.getLocation(), diag::err_expected_qualified_after_typename);
	return true;
	}

	TypeResult Ty;
	if (Tok.is(tok::identifier)) {
	// FIXME: check whether the next token is '<', first!
	Ty = Actions.ActOnTypenameType(getCurScope(), TypenameLoc, SS,
	*Tok.getIdentifierInfo(),
	Tok.getLocation());
	} else if (Tok.is(tok::annot_template_id)) {
	TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Tok);
	if (TemplateId->Kind != TNK_Type_template &&
	TemplateId->Kind != TNK_Dependent_template_name) {
	Diag(Tok, diag::err_typename_refers_to_non_type_template)
	<< Tok.getAnnotationRange();
	return true;
	}

	ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
	TemplateId->NumArgs);

	Ty = Actions.ActOnTypenameType(getCurScope(), TypenameLoc, SS,
	TemplateId->TemplateKWLoc,
	TemplateId->Template,
	TemplateId->Name,
	TemplateId->TemplateNameLoc,
	TemplateId->LAngleLoc,
	TemplateArgsPtr,
	TemplateId->RAngleLoc);
	} else {
	Diag(Tok, diag::err_expected_type_name_after_typename)
	<< SS.getRange();
	return true;
	}

	SourceLocation EndLoc = Tok.getLastLoc();
	Tok.setKind(tok::annot_typename);
	setTypeAnnotation(Tok, Ty.isInvalid() ? nullptr : Ty.get());
	Tok.setAnnotationEndLoc(EndLoc);
	Tok.setLocation(TypenameLoc);
	PP.AnnotateCachedTokens(Tok);
	return false;
	}

	// Remembers whether the token was originally a scope annotation.
	bool WasScopeAnnotation = Tok.is(tok::annot_cxxscope);

	CXXScopeSpec SS;
	if (getLangOpts().CPlusPlus)
	if (ParseOptionalCXXScopeSpecifier(SS, nullptr, /EnteringContext/false))
	return true;

	return TryAnnotateTypeOrScopeTokenAfterScopeSpec(SS, !WasScopeAnnotation);
	}

	/// \brief Try to annotate a type or scope token, having already parsed an
	/// optional scope specifier. \p IsNewScope should be \c true unless the scope
	/// specifier was extracted from an existing tok::annot_cxxscope annotation.
	bool Parser::TryAnnotateTypeOrScopeTokenAfterScopeSpec(CXXScopeSpec &SS,
	bool IsNewScope) {
	if (Tok.is(tok::identifier)) {
	// Determine whether the identifier is a type name.
	if (ParsedType Ty = Actions.getTypeName(
	*Tok.getIdentifierInfo(), Tok.getLocation(), getCurScope(), &SS,
	false, NextToken().is(tok::period), nullptr,
	/IsCtorOrDtorName=/false,
	/NonTrivialTypeSourceInfo/ true,
	/IsClassTemplateDeductionContext/GreaterThanIsOperator)) {
	SourceLocation BeginLoc = Tok.getLocation();
	if (SS.isNotEmpty()) // it was a C++ qualified type name.
	BeginLoc = SS.getBeginLoc();

	/// An Objective-C object type followed by '<' is a specialization of
	/// a parameterized class type or a protocol-qualified type.
	if (getLangOpts().ObjC1 && NextToken().is(tok::less) &&
	(Ty.get()->isObjCObjectType() \|\|
	Ty.get()->isObjCObjectPointerType())) {
	// Consume the name.
	SourceLocation IdentifierLoc = ConsumeToken();
	SourceLocation NewEndLoc;
	TypeResult NewType
	= parseObjCTypeArgsAndProtocolQualifiers(IdentifierLoc, Ty,
	/consumeLastToken=/false,
	NewEndLoc);
	if (NewType.isUsable())
	Ty = NewType.get();
	else if (Tok.is(tok::eof)) // Nothing to do here, bail out...
	return false;
	}

	// This is a typename. Replace the current token in-place with an
	// annotation type token.
	Tok.setKind(tok::annot_typename);
	setTypeAnnotation(Tok, Ty);
	Tok.setAnnotationEndLoc(Tok.getLocation());
	Tok.setLocation(BeginLoc);

	// In case the tokens were cached, have Preprocessor replace
	// them with the annotation token.
	PP.AnnotateCachedTokens(Tok);
	return false;
	}

	if (!getLangOpts().CPlusPlus) {
	// If we're in C, we can't have :: tokens at all (the lexer won't return
	// them). If the identifier is not a type, then it can't be scope either,
	// just early exit.
	return false;
	}

	// If this is a template-id, annotate with a template-id or type token.
	if (NextToken().is(tok::less)) {
	TemplateTy Template;
	UnqualifiedId TemplateName;
	TemplateName.setIdentifier(Tok.getIdentifierInfo(), Tok.getLocation());
	bool MemberOfUnknownSpecialization;
	if (TemplateNameKind TNK = Actions.isTemplateName(
	getCurScope(), SS,
	/hasTemplateKeyword=/false, TemplateName,
	/ObjectType=/nullptr, /EnteringContext/false, Template,
	MemberOfUnknownSpecialization)) {
	// Consume the identifier.
	ConsumeToken();
	if (AnnotateTemplateIdToken(Template, TNK, SS, SourceLocation(),
	TemplateName)) {
	// If an unrecoverable error occurred, we need to return true here,
	// because the token stream is in a damaged state. We may not return
	// a valid identifier.
	return true;
	}
	}
	}

	// The current token, which is either an identifier or a
	// template-id, is not part of the annotation. Fall through to
	// push that token back into the stream and complete the C++ scope
	// specifier annotation.
	}

	if (Tok.is(tok::annot_template_id)) {
	TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Tok);
	if (TemplateId->Kind == TNK_Type_template) {
	// A template-id that refers to a type was parsed into a
	// template-id annotation in a context where we weren't allowed
	// to produce a type annotation token. Update the template-id
	// annotation token to a type annotation token now.
	AnnotateTemplateIdTokenAsType();
	return false;
	}
	}

	if (SS.isEmpty())
	return false;

	// A C++ scope specifier that isn't followed by a typename.
	AnnotateScopeToken(SS, IsNewScope);
	return false;
	}

	/// TryAnnotateScopeToken - Like TryAnnotateTypeOrScopeToken but only
	/// annotates C++ scope specifiers and template-ids. This returns
	/// true if there was an error that could not be recovered from.
	///
	/// Note that this routine emits an error if you call it with ::new or ::delete
	/// as the current tokens, so only call it in contexts where these are invalid.
	bool Parser::TryAnnotateCXXScopeToken(bool EnteringContext) {
	assert(getLangOpts().CPlusPlus &&
	"Call sites of this function should be guarded by checking for C++");
	assert((Tok.is(tok::identifier) \|\| Tok.is(tok::coloncolon) \|\|
	(Tok.is(tok::annot_template_id) && NextToken().is(tok::coloncolon)) \|\|
	Tok.is(tok::kw_decltype) \|\| Tok.is(tok::kw___super)) &&
	"Cannot be a type or scope token!");

	CXXScopeSpec SS;
	if (ParseOptionalCXXScopeSpecifier(SS, nullptr, EnteringContext))
	return true;
	if (SS.isEmpty())
	return false;

	AnnotateScopeToken(SS, true);
	return false;
	}

	bool Parser::isTokenEqualOrEqualTypo() {
	tok::TokenKind Kind = Tok.getKind();
	switch (Kind) {
	default:
	return false;
	case tok::ampequal: // &=
	case tok::starequal: // *=
	case tok::plusequal: // +=
	case tok::minusequal: // -=
	case tok::exclaimequal: // !=
	case tok::slashequal: // /=
	case tok::percentequal: // %=
	case tok::lessequal: // <=
	case tok::lesslessequal: // <<=
	case tok::greaterequal: // >=
	case tok::greatergreaterequal: // >>=
	case tok::caretequal: // ^=
	case tok::pipeequal: // \|=
	case tok::equalequal: // ==
	Diag(Tok, diag::err_invalid_token_after_declarator_suggest_equal)
	<< Kind
	<< FixItHint::CreateReplacement(SourceRange(Tok.getLocation()), "=");
	LLVM_FALLTHROUGH;
	case tok::equal:
	return true;
	}
	}

	SourceLocation Parser::handleUnexpectedCodeCompletionToken() {
	assert(Tok.is(tok::code_completion));
	PrevTokLocation = Tok.getLocation();

	for (Scope *S = getCurScope(); S; S = S->getParent()) {
	if (S->getFlags() & Scope::FnScope) {
	Actions.CodeCompleteOrdinaryName(getCurScope(),
	Sema::PCC_RecoveryInFunction);
	cutOffParsing();
	return PrevTokLocation;
	}

	if (S->getFlags() & Scope::ClassScope) {
	Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Class);
	cutOffParsing();
	return PrevTokLocation;
	}
	}

	Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Namespace);
	cutOffParsing();
	return PrevTokLocation;
	}

	// Code-completion pass-through functions

	void Parser::CodeCompleteDirective(bool InConditional) {
	Actions.CodeCompletePreprocessorDirective(InConditional);
	}

	void Parser::CodeCompleteInConditionalExclusion() {
	Actions.CodeCompleteInPreprocessorConditionalExclusion(getCurScope());
	}

	void Parser::CodeCompleteMacroName(bool IsDefinition) {
	Actions.CodeCompletePreprocessorMacroName(IsDefinition);
	}

	void Parser::CodeCompletePreprocessorExpression() {
	Actions.CodeCompletePreprocessorExpression();
	}

	void Parser::CodeCompleteMacroArgument(IdentifierInfo *Macro,
	MacroInfo *MacroInfo,
	unsigned ArgumentIndex) {
	Actions.CodeCompletePreprocessorMacroArgument(getCurScope(), Macro, MacroInfo,
	ArgumentIndex);
	}

	void Parser::CodeCompleteNaturalLanguage() {
	Actions.CodeCompleteNaturalLanguage();
	}

	bool Parser::ParseMicrosoftIfExistsCondition(IfExistsCondition& Result) {
	assert((Tok.is(tok::kw___if_exists) \|\| Tok.is(tok::kw___if_not_exists)) &&
	"Expected '__if_exists' or '__if_not_exists'");
	Result.IsIfExists = Tok.is(tok::kw___if_exists);
	Result.KeywordLoc = ConsumeToken();

	BalancedDelimiterTracker T(*this, tok::l_paren);
	if (T.consumeOpen()) {
	Diag(Tok, diag::err_expected_lparen_after)
	<< (Result.IsIfExists? "__if_exists" : "__if_not_exists");
	return true;
	}

	// Parse nested-name-specifier.
	if (getLangOpts().CPlusPlus)
	ParseOptionalCXXScopeSpecifier(Result.SS, nullptr,
	/EnteringContext=/false);

	// Check nested-name specifier.
	if (Result.SS.isInvalid()) {
	T.skipToEnd();
	return true;
	}

	// Parse the unqualified-id.
	SourceLocation TemplateKWLoc; // FIXME: parsed, but unused.
	if (ParseUnqualifiedId(
	Result.SS, /EnteringContext/false, /AllowDestructorName/true,
	/AllowConstructorName/true, /AllowDeductionGuide/false, nullptr,
	TemplateKWLoc, Result.Name)) {
	T.skipToEnd();
	return true;
	}

	if (T.consumeClose())
	return true;

	// Check if the symbol exists.
	switch (Actions.CheckMicrosoftIfExistsSymbol(getCurScope(), Result.KeywordLoc,
	Result.IsIfExists, Result.SS,
	Result.Name)) {
	case Sema::IER_Exists:
	Result.Behavior = Result.IsIfExists ? IEB_Parse : IEB_Skip;
	break;

	case Sema::IER_DoesNotExist:
	Result.Behavior = !Result.IsIfExists ? IEB_Parse : IEB_Skip;
	break;

	case Sema::IER_Dependent:
	Result.Behavior = IEB_Dependent;
	break;

	case Sema::IER_Error:
	return true;
	}

	return false;
	}

	void Parser::ParseMicrosoftIfExistsExternalDeclaration() {
	IfExistsCondition Result;
	if (ParseMicrosoftIfExistsCondition(Result))
	return;

	BalancedDelimiterTracker Braces(*this, tok::l_brace);
	if (Braces.consumeOpen()) {
	Diag(Tok, diag::err_expected) << tok::l_brace;
	return;
	}

	switch (Result.Behavior) {
	case IEB_Parse:
	// Parse declarations below.
	break;

	case IEB_Dependent:
	llvm_unreachable("Cannot have a dependent external declaration");

	case IEB_Skip:
	Braces.skipToEnd();
	return;
	}

	// Parse the declarations.
	// FIXME: Support module import within __if_exists?
	while (Tok.isNot(tok::r_brace) && !isEofOrEom()) {
	ParsedAttributesWithRange attrs(AttrFactory);
	MaybeParseCXX11Attributes(attrs);
	DeclGroupPtrTy Result = ParseExternalDeclaration(attrs);
	if (Result && !getCurScope()->getParent())
	Actions.getASTConsumer().HandleTopLevelDecl(Result.get());
	}
	Braces.consumeClose();
	}

	/// Parse a C++ Modules TS module declaration, which appears at the beginning
	/// of a module interface, module partition, or module implementation file.
	///
	/// module-declaration: [Modules TS + P0273R0 + P0629R0]
	/// 'export'[opt] 'module' 'partition'[opt]
	/// module-name attribute-specifier-seq[opt] ';'
	///
	/// Note that 'partition' is a context-sensitive keyword.
	Parser::DeclGroupPtrTy Parser::ParseModuleDecl() {
	SourceLocation StartLoc = Tok.getLocation();

	Sema::ModuleDeclKind MDK = TryConsumeToken(tok::kw_export)
	? Sema::ModuleDeclKind::Module
	: Sema::ModuleDeclKind::Implementation;

	assert(Tok.is(tok::kw_module) && "not a module declaration");
	SourceLocation ModuleLoc = ConsumeToken();

	if (Tok.is(tok::identifier) && NextToken().is(tok::identifier) &&
	Tok.getIdentifierInfo()->isStr("partition")) {
	// If 'partition' is present, this must be a module interface unit.
	if (MDK != Sema::ModuleDeclKind::Module)
	Diag(Tok.getLocation(), diag::err_module_implementation_partition)
	<< FixItHint::CreateInsertion(ModuleLoc, "export ");
	MDK = Sema::ModuleDeclKind::Partition;
	ConsumeToken();
	}

	SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
	if (ParseModuleName(ModuleLoc, Path, /IsImport/false))
	return nullptr;

	// We don't support any module attributes yet; just parse them and diagnose.
	ParsedAttributesWithRange Attrs(AttrFactory);
	MaybeParseCXX11Attributes(Attrs);
	ProhibitCXX11Attributes(Attrs, diag::err_attribute_not_module_attr);

	ExpectAndConsumeSemi(diag::err_module_expected_semi);

	return Actions.ActOnModuleDecl(StartLoc, ModuleLoc, MDK, Path);
	}

	/// Parse a module import declaration. This is essentially the same for
	/// Objective-C and the C++ Modules TS, except for the leading '@' (in ObjC)
	/// and the trailing optional attributes (in C++).
	///
	/// [ObjC] @import declaration:
	/// '@' 'import' module-name ';'
	/// [ModTS] module-import-declaration:
	/// 'import' module-name attribute-specifier-seq[opt] ';'
	Parser::DeclGroupPtrTy Parser::ParseModuleImport(SourceLocation AtLoc) {
	assert((AtLoc.isInvalid() ? Tok.is(tok::kw_import)
	: Tok.isObjCAtKeyword(tok::objc_import)) &&
	"Improper start to module import");
	SourceLocation ImportLoc = ConsumeToken();
	SourceLocation StartLoc = AtLoc.isInvalid() ? ImportLoc : AtLoc;

	SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
	if (ParseModuleName(ImportLoc, Path, /IsImport/true))
	return nullptr;

	ParsedAttributesWithRange Attrs(AttrFactory);
	MaybeParseCXX11Attributes(Attrs);
	// We don't support any module import attributes yet.
	ProhibitCXX11Attributes(Attrs, diag::err_attribute_not_import_attr);

	if (PP.hadModuleLoaderFatalFailure()) {
	// With a fatal failure in the module loader, we abort parsing.
	cutOffParsing();
	return nullptr;
	}

	DeclResult Import = Actions.ActOnModuleImport(StartLoc, ImportLoc, Path);
	ExpectAndConsumeSemi(diag::err_module_expected_semi);
	if (Import.isInvalid())
	return nullptr;

	return Actions.ConvertDeclToDeclGroup(Import.get());
	}

	/// Parse a C++ Modules TS / Objective-C module name (both forms use the same
	/// grammar).
	///
	/// module-name:
	/// module-name-qualifier[opt] identifier
	/// module-name-qualifier:
	/// module-name-qualifier[opt] identifier '.'
	bool Parser::ParseModuleName(
	SourceLocation UseLoc,
	SmallVectorImpl<std::pair<IdentifierInfo *, SourceLocation>> &Path,
	bool IsImport) {
	// Parse the module path.
	while (true) {
	if (!Tok.is(tok::identifier)) {
	if (Tok.is(tok::code_completion)) {
	Actions.CodeCompleteModuleImport(UseLoc, Path);
	cutOffParsing();
	return true;
	}

	Diag(Tok, diag::err_module_expected_ident) << IsImport;
	SkipUntil(tok::semi);
	return true;
	}

	// Record this part of the module path.
	Path.push_back(std::make_pair(Tok.getIdentifierInfo(), Tok.getLocation()));
	ConsumeToken();

	if (Tok.isNot(tok::period))
	return false;

	ConsumeToken();
	}
	}

	/// \brief Try recover parser when module annotation appears where it must not
	/// be found.
	/// \returns false if the recover was successful and parsing may be continued, or
	/// true if parser must bail out to top level and handle the token there.
	bool Parser::parseMisplacedModuleImport() {
	while (true) {
	switch (Tok.getKind()) {
	case tok::annot_module_end:
	// If we recovered from a misplaced module begin, we expect to hit a
	// misplaced module end too. Stay in the current context when this
	// happens.
	if (MisplacedModuleBeginCount) {
	--MisplacedModuleBeginCount;
	Actions.ActOnModuleEnd(Tok.getLocation(),
	reinterpret_cast<Module *>(
	Tok.getAnnotationValue()));
	ConsumeAnnotationToken();
	continue;
	}
	// Inform caller that recovery failed, the error must be handled at upper
	// level. This will generate the desired "missing '}' at end of module"
	// diagnostics on the way out.
	return true;
	case tok::annot_module_begin:
	// Recover by entering the module (Sema will diagnose).
	Actions.ActOnModuleBegin(Tok.getLocation(),
	reinterpret_cast<Module *>(
	Tok.getAnnotationValue()));
	ConsumeAnnotationToken();
	++MisplacedModuleBeginCount;
	continue;
	case tok::annot_module_include:
	// Module import found where it should not be, for instance, inside a
	// namespace. Recover by importing the module.
	Actions.ActOnModuleInclude(Tok.getLocation(),
	reinterpret_cast<Module *>(
	Tok.getAnnotationValue()));
	ConsumeAnnotationToken();
	// If there is another module import, process it.
	continue;
	default:
	return false;
	}
	}
	return false;
	}

	bool BalancedDelimiterTracker::diagnoseOverflow() {
	P.Diag(P.Tok, diag::err_bracket_depth_exceeded)
	<< P.getLangOpts().BracketDepth;
	P.Diag(P.Tok, diag::note_bracket_depth);
	P.cutOffParsing();
	return true;
	}

	bool BalancedDelimiterTracker::expectAndConsume(unsigned DiagID,
	const char *Msg,
	tok::TokenKind SkipToTok) {
	LOpen = P.Tok.getLocation();
	if (P.ExpectAndConsume(Kind, DiagID, Msg)) {
	if (SkipToTok != tok::unknown)
	P.SkipUntil(SkipToTok, Parser::StopAtSemi);
	return true;
	}

	if (getDepth() < MaxDepth)
	return false;

	return diagnoseOverflow();
	}

	bool BalancedDelimiterTracker::diagnoseMissingClose() {
	assert(!P.Tok.is(Close) && "Should have consumed closing delimiter");

	if (P.Tok.is(tok::annot_module_end))
	P.Diag(P.Tok, diag::err_missing_before_module_end) << Close;
	else
	P.Diag(P.Tok, diag::err_expected) << Close;
	P.Diag(LOpen, diag::note_matching) << Kind;

	// If we're not already at some kind of closing bracket, skip to our closing
	// token.
	if (P.Tok.isNot(tok::r_paren) && P.Tok.isNot(tok::r_brace) &&
	P.Tok.isNot(tok::r_square) &&
	P.SkipUntil(Close, FinalToken,
	Parser::StopAtSemi \| Parser::StopBeforeMatch) &&
	P.Tok.is(Close))
	LClose = P.ConsumeAnyToken();
	return true;
	}

	void BalancedDelimiterTracker::skipToEnd() {
	P.SkipUntil(Close, Parser::StopBeforeMatch);
	consumeClose();
	}
	Index: head/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp (revision 322855)
	@@ -1,14920 +1,14970 @@
	//===------ SemaDeclCXX.cpp - Semantic Analysis for C++ Declarations ------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements semantic analysis for C++ declarations.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/AST/ASTConsumer.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTLambda.h"
	#include "clang/AST/ASTMutationListener.h"
	#include "clang/AST/CXXInheritance.h"
	#include "clang/AST/CharUnits.h"
	#include "clang/AST/EvaluatedExprVisitor.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/RecordLayout.h"
	#include "clang/AST/RecursiveASTVisitor.h"
	#include "clang/AST/StmtVisitor.h"
	#include "clang/AST/TypeLoc.h"
	#include "clang/AST/TypeOrdering.h"
	#include "clang/Basic/PartialDiagnostic.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Lex/LiteralSupport.h"
	#include "clang/Lex/Preprocessor.h"
	#include "clang/Sema/CXXFieldCollector.h"
	#include "clang/Sema/DeclSpec.h"
	#include "clang/Sema/Initialization.h"
	#include "clang/Sema/Lookup.h"
	#include "clang/Sema/ParsedTemplate.h"
	#include "clang/Sema/Scope.h"
	#include "clang/Sema/ScopeInfo.h"
	#include "clang/Sema/SemaInternal.h"
	#include "clang/Sema/Template.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallString.h"
	#include "llvm/ADT/StringExtras.h"
	#include <map>
	#include <set>

	using namespace clang;

	//===----------------------------------------------------------------------===//
	// CheckDefaultArgumentVisitor
	//===----------------------------------------------------------------------===//

	namespace {
	/// CheckDefaultArgumentVisitor - C++ [dcl.fct.default] Traverses
	/// the default argument of a parameter to determine whether it
	/// contains any ill-formed subexpressions. For example, this will
	/// diagnose the use of local variables or parameters within the
	/// default argument expression.
	class CheckDefaultArgumentVisitor
	: public StmtVisitor<CheckDefaultArgumentVisitor, bool> {
	Expr *DefaultArg;
	Sema *S;

	public:
	CheckDefaultArgumentVisitor(Expr defarg, Sema s)
	: DefaultArg(defarg), S(s) {}

	bool VisitExpr(Expr *Node);
	bool VisitDeclRefExpr(DeclRefExpr *DRE);
	bool VisitCXXThisExpr(CXXThisExpr *ThisE);
	bool VisitLambdaExpr(LambdaExpr *Lambda);
	bool VisitPseudoObjectExpr(PseudoObjectExpr *POE);
	};

	/// VisitExpr - Visit all of the children of this expression.
	bool CheckDefaultArgumentVisitor::VisitExpr(Expr *Node) {
	bool IsInvalid = false;
	for (Stmt *SubStmt : Node->children())
	IsInvalid \|= Visit(SubStmt);
	return IsInvalid;
	}

	/// VisitDeclRefExpr - Visit a reference to a declaration, to
	/// determine whether this declaration can be used in the default
	/// argument expression.
	bool CheckDefaultArgumentVisitor::VisitDeclRefExpr(DeclRefExpr *DRE) {
	NamedDecl *Decl = DRE->getDecl();
	if (ParmVarDecl *Param = dyn_cast<ParmVarDecl>(Decl)) {
	// C++ [dcl.fct.default]p9
	// Default arguments are evaluated each time the function is
	// called. The order of evaluation of function arguments is
	// unspecified. Consequently, parameters of a function shall not
	// be used in default argument expressions, even if they are not
	// evaluated. Parameters of a function declared before a default
	// argument expression are in scope and can hide namespace and
	// class member names.
	return S->Diag(DRE->getLocStart(),
	diag::err_param_default_argument_references_param)
	<< Param->getDeclName() << DefaultArg->getSourceRange();
	} else if (VarDecl *VDecl = dyn_cast<VarDecl>(Decl)) {
	// C++ [dcl.fct.default]p7
	// Local variables shall not be used in default argument
	// expressions.
	if (VDecl->isLocalVarDecl())
	return S->Diag(DRE->getLocStart(),
	diag::err_param_default_argument_references_local)
	<< VDecl->getDeclName() << DefaultArg->getSourceRange();
	}

	return false;
	}

	/// VisitCXXThisExpr - Visit a C++ "this" expression.
	bool CheckDefaultArgumentVisitor::VisitCXXThisExpr(CXXThisExpr *ThisE) {
	// C++ [dcl.fct.default]p8:
	// The keyword this shall not be used in a default argument of a
	// member function.
	return S->Diag(ThisE->getLocStart(),
	diag::err_param_default_argument_references_this)
	<< ThisE->getSourceRange();
	}

	bool CheckDefaultArgumentVisitor::VisitPseudoObjectExpr(PseudoObjectExpr *POE) {
	bool Invalid = false;
	for (PseudoObjectExpr::semantics_iterator
	i = POE->semantics_begin(), e = POE->semantics_end(); i != e; ++i) {
	Expr E = i;

	// Look through bindings.
	if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E)) {
	E = OVE->getSourceExpr();
	assert(E && "pseudo-object binding without source expression?");
	}

	Invalid \|= Visit(E);
	}
	return Invalid;
	}

	bool CheckDefaultArgumentVisitor::VisitLambdaExpr(LambdaExpr *Lambda) {
	// C++11 [expr.lambda.prim]p13:
	// A lambda-expression appearing in a default argument shall not
	// implicitly or explicitly capture any entity.
	if (Lambda->capture_begin() == Lambda->capture_end())
	return false;

	return S->Diag(Lambda->getLocStart(),
	diag::err_lambda_capture_default_arg);
	}
	}

	void
	Sema::ImplicitExceptionSpecification::CalledDecl(SourceLocation CallLoc,
	const CXXMethodDecl *Method) {
	// If we have an MSAny spec already, don't bother.
	if (!Method \|\| ComputedEST == EST_MSAny)
	return;

	const FunctionProtoType *Proto
	= Method->getType()->getAs<FunctionProtoType>();
	Proto = Self->ResolveExceptionSpec(CallLoc, Proto);
	if (!Proto)
	return;

	ExceptionSpecificationType EST = Proto->getExceptionSpecType();

	// If we have a throw-all spec at this point, ignore the function.
	if (ComputedEST == EST_None)
	return;

	switch(EST) {
	// If this function can throw any exceptions, make a note of that.
	case EST_MSAny:
	case EST_None:
	ClearExceptions();
	ComputedEST = EST;
	return;
	// FIXME: If the call to this decl is using any of its default arguments, we
	// need to search them for potentially-throwing calls.
	// If this function has a basic noexcept, it doesn't affect the outcome.
	case EST_BasicNoexcept:
	return;
	// If we're still at noexcept(true) and there's a nothrow() callee,
	// change to that specification.
	case EST_DynamicNone:
	if (ComputedEST == EST_BasicNoexcept)
	ComputedEST = EST_DynamicNone;
	return;
	// Check out noexcept specs.
	case EST_ComputedNoexcept:
	{
	FunctionProtoType::NoexceptResult NR =
	Proto->getNoexceptSpec(Self->Context);
	assert(NR != FunctionProtoType::NR_NoNoexcept &&
	"Must have noexcept result for EST_ComputedNoexcept.");
	assert(NR != FunctionProtoType::NR_Dependent &&
	"Should not generate implicit declarations for dependent cases, "
	"and don't know how to handle them anyway.");
	// noexcept(false) -> no spec on the new function
	if (NR == FunctionProtoType::NR_Throw) {
	ClearExceptions();
	ComputedEST = EST_None;
	}
	// noexcept(true) won't change anything either.
	return;
	}
	default:
	break;
	}
	assert(EST == EST_Dynamic && "EST case not considered earlier.");
	assert(ComputedEST != EST_None &&
	"Shouldn't collect exceptions when throw-all is guaranteed.");
	ComputedEST = EST_Dynamic;
	// Record the exceptions in this function's exception specification.
	for (const auto &E : Proto->exceptions())
	if (ExceptionsSeen.insert(Self->Context.getCanonicalType(E)).second)
	Exceptions.push_back(E);
	}

	void Sema::ImplicitExceptionSpecification::CalledExpr(Expr *E) {
	if (!E \|\| ComputedEST == EST_MSAny)
	return;

	// FIXME:
	//
	// C++0x [except.spec]p14:
	// [An] implicit exception-specification specifies the type-id T if and
	// only if T is allowed by the exception-specification of a function directly
	// invoked by f's implicit definition; f shall allow all exceptions if any
	// function it directly invokes allows all exceptions, and f shall allow no
	// exceptions if every function it directly invokes allows no exceptions.
	//
	// Note in particular that if an implicit exception-specification is generated
	// for a function containing a throw-expression, that specification can still
	// be noexcept(true).
	//
	// Note also that 'directly invoked' is not defined in the standard, and there
	// is no indication that we should only consider potentially-evaluated calls.
	//
	// Ultimately we should implement the intent of the standard: the exception
	// specification should be the set of exceptions which can be thrown by the
	// implicit definition. For now, we assume that any non-nothrow expression can
	// throw any exception.

	if (Self->canThrow(E))
	ComputedEST = EST_None;
	}

	bool
	Sema::SetParamDefaultArgument(ParmVarDecl Param, Expr Arg,
	SourceLocation EqualLoc) {
	if (RequireCompleteType(Param->getLocation(), Param->getType(),
	diag::err_typecheck_decl_incomplete_type)) {
	Param->setInvalidDecl();
	return true;
	}

	// C++ [dcl.fct.default]p5
	// A default argument expression is implicitly converted (clause
	// 4) to the parameter type. The default argument expression has
	// the same semantic constraints as the initializer expression in
	// a declaration of a variable of the parameter type, using the
	// copy-initialization semantics (8.5).
	InitializedEntity Entity = InitializedEntity::InitializeParameter(Context,
	Param);
	InitializationKind Kind = InitializationKind::CreateCopy(Param->getLocation(),
	EqualLoc);
	InitializationSequence InitSeq(*this, Entity, Kind, Arg);
	ExprResult Result = InitSeq.Perform(*this, Entity, Kind, Arg);
	if (Result.isInvalid())
	return true;
	Arg = Result.getAs<Expr>();

	CheckCompletedExpr(Arg, EqualLoc);
	Arg = MaybeCreateExprWithCleanups(Arg);

	// Okay: add the default argument to the parameter
	Param->setDefaultArg(Arg);

	// We have already instantiated this parameter; provide each of the
	// instantiations with the uninstantiated default argument.
	UnparsedDefaultArgInstantiationsMap::iterator InstPos
	= UnparsedDefaultArgInstantiations.find(Param);
	if (InstPos != UnparsedDefaultArgInstantiations.end()) {
	for (unsigned I = 0, N = InstPos->second.size(); I != N; ++I)
	InstPos->second[I]->setUninstantiatedDefaultArg(Arg);

	// We're done tracking this parameter's instantiations.
	UnparsedDefaultArgInstantiations.erase(InstPos);
	}

	return false;
	}

	/// ActOnParamDefaultArgument - Check whether the default argument
	/// provided for a function parameter is well-formed. If so, attach it
	/// to the parameter declaration.
	void
	Sema::ActOnParamDefaultArgument(Decl *param, SourceLocation EqualLoc,
	Expr *DefaultArg) {
	if (!param \|\| !DefaultArg)
	return;

	ParmVarDecl *Param = cast<ParmVarDecl>(param);
	UnparsedDefaultArgLocs.erase(Param);

	// Default arguments are only permitted in C++
	if (!getLangOpts().CPlusPlus) {
	Diag(EqualLoc, diag::err_param_default_argument)
	<< DefaultArg->getSourceRange();
	Param->setInvalidDecl();
	return;
	}

	// Check for unexpanded parameter packs.
	if (DiagnoseUnexpandedParameterPack(DefaultArg, UPPC_DefaultArgument)) {
	Param->setInvalidDecl();
	return;
	}

	// C++11 [dcl.fct.default]p3
	// A default argument expression [...] shall not be specified for a
	// parameter pack.
	if (Param->isParameterPack()) {
	Diag(EqualLoc, diag::err_param_default_argument_on_parameter_pack)
	<< DefaultArg->getSourceRange();
	return;
	}

	// Check that the default argument is well-formed
	CheckDefaultArgumentVisitor DefaultArgChecker(DefaultArg, this);
	if (DefaultArgChecker.Visit(DefaultArg)) {
	Param->setInvalidDecl();
	return;
	}

	SetParamDefaultArgument(Param, DefaultArg, EqualLoc);
	}

	/// ActOnParamUnparsedDefaultArgument - We've seen a default
	/// argument for a function parameter, but we can't parse it yet
	/// because we're inside a class definition. Note that this default
	/// argument will be parsed later.
	void Sema::ActOnParamUnparsedDefaultArgument(Decl *param,
	SourceLocation EqualLoc,
	SourceLocation ArgLoc) {
	if (!param)
	return;

	ParmVarDecl *Param = cast<ParmVarDecl>(param);
	Param->setUnparsedDefaultArg();
	UnparsedDefaultArgLocs[Param] = ArgLoc;
	}

	/// ActOnParamDefaultArgumentError - Parsing or semantic analysis of
	/// the default argument for the parameter param failed.
	void Sema::ActOnParamDefaultArgumentError(Decl *param,
	SourceLocation EqualLoc) {
	if (!param)
	return;

	ParmVarDecl *Param = cast<ParmVarDecl>(param);
	Param->setInvalidDecl();
	UnparsedDefaultArgLocs.erase(Param);
	Param->setDefaultArg(new(Context)
	OpaqueValueExpr(EqualLoc,
	Param->getType().getNonReferenceType(),
	VK_RValue));
	}

	/// CheckExtraCXXDefaultArguments - Check for any extra default
	/// arguments in the declarator, which is not a function declaration
	/// or definition and therefore is not permitted to have default
	/// arguments. This routine should be invoked for every declarator
	/// that is not a function declaration or definition.
	void Sema::CheckExtraCXXDefaultArguments(Declarator &D) {
	// C++ [dcl.fct.default]p3
	// A default argument expression shall be specified only in the
	// parameter-declaration-clause of a function declaration or in a
	// template-parameter (14.1). It shall not be specified for a
	// parameter pack. If it is specified in a
	// parameter-declaration-clause, it shall not occur within a
	// declarator or abstract-declarator of a parameter-declaration.
	bool MightBeFunction = D.isFunctionDeclarationContext();
	for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) {
	DeclaratorChunk &chunk = D.getTypeObject(i);
	if (chunk.Kind == DeclaratorChunk::Function) {
	if (MightBeFunction) {
	// This is a function declaration. It can have default arguments, but
	// keep looking in case its return type is a function type with default
	// arguments.
	MightBeFunction = false;
	continue;
	}
	for (unsigned argIdx = 0, e = chunk.Fun.NumParams; argIdx != e;
	++argIdx) {
	ParmVarDecl *Param = cast<ParmVarDecl>(chunk.Fun.Params[argIdx].Param);
	if (Param->hasUnparsedDefaultArg()) {
	std::unique_ptr<CachedTokens> Toks =
	std::move(chunk.Fun.Params[argIdx].DefaultArgTokens);
	SourceRange SR;
	if (Toks->size() > 1)
	SR = SourceRange((*Toks)[1].getLocation(),
	Toks->back().getLocation());
	else
	SR = UnparsedDefaultArgLocs[Param];
	Diag(Param->getLocation(), diag::err_param_default_argument_nonfunc)
	<< SR;
	} else if (Param->getDefaultArg()) {
	Diag(Param->getLocation(), diag::err_param_default_argument_nonfunc)
	<< Param->getDefaultArg()->getSourceRange();
	Param->setDefaultArg(nullptr);
	}
	}
	} else if (chunk.Kind != DeclaratorChunk::Paren) {
	MightBeFunction = false;
	}
	}
	}

	static bool functionDeclHasDefaultArgument(const FunctionDecl *FD) {
	for (unsigned NumParams = FD->getNumParams(); NumParams > 0; --NumParams) {
	const ParmVarDecl *PVD = FD->getParamDecl(NumParams-1);
	if (!PVD->hasDefaultArg())
	return false;
	if (!PVD->hasInheritedDefaultArg())
	return true;
	}
	return false;
	}

	/// MergeCXXFunctionDecl - Merge two declarations of the same C++
	/// function, once we already know that they have the same
	/// type. Subroutine of MergeFunctionDecl. Returns true if there was an
	/// error, false otherwise.
	bool Sema::MergeCXXFunctionDecl(FunctionDecl New, FunctionDecl Old,
	Scope *S) {
	bool Invalid = false;

	// The declaration context corresponding to the scope is the semantic
	// parent, unless this is a local function declaration, in which case
	// it is that surrounding function.
	DeclContext *ScopeDC = New->isLocalExternDecl()
	? New->getLexicalDeclContext()
	: New->getDeclContext();

	// Find the previous declaration for the purpose of default arguments.
	FunctionDecl *PrevForDefaultArgs = Old;
	for (/**/; PrevForDefaultArgs;
	// Don't bother looking back past the latest decl if this is a local
	// extern declaration; nothing else could work.
	PrevForDefaultArgs = New->isLocalExternDecl()
	? nullptr
	: PrevForDefaultArgs->getPreviousDecl()) {
	// Ignore hidden declarations.
	if (!LookupResult::isVisible(*this, PrevForDefaultArgs))
	continue;

	if (S && !isDeclInScope(PrevForDefaultArgs, ScopeDC, S) &&
	!New->isCXXClassMember()) {
	// Ignore default arguments of old decl if they are not in
	// the same scope and this is not an out-of-line definition of
	// a member function.
	continue;
	}

	if (PrevForDefaultArgs->isLocalExternDecl() != New->isLocalExternDecl()) {
	// If only one of these is a local function declaration, then they are
	// declared in different scopes, even though isDeclInScope may think
	// they're in the same scope. (If both are local, the scope check is
	// sufficient, and if neither is local, then they are in the same scope.)
	continue;
	}

	// We found the right previous declaration.
	break;
	}

	// C++ [dcl.fct.default]p4:
	// For non-template functions, default arguments can be added in
	// later declarations of a function in the same
	// scope. Declarations in different scopes have completely
	// distinct sets of default arguments. That is, declarations in
	// inner scopes do not acquire default arguments from
	// declarations in outer scopes, and vice versa. In a given
	// function declaration, all parameters subsequent to a
	// parameter with a default argument shall have default
	// arguments supplied in this or previous declarations. A
	// default argument shall not be redefined by a later
	// declaration (not even to the same value).
	//
	// C++ [dcl.fct.default]p6:
	// Except for member functions of class templates, the default arguments
	// in a member function definition that appears outside of the class
	// definition are added to the set of default arguments provided by the
	// member function declaration in the class definition.
	for (unsigned p = 0, NumParams = PrevForDefaultArgs
	? PrevForDefaultArgs->getNumParams()
	: 0;
	p < NumParams; ++p) {
	ParmVarDecl *OldParam = PrevForDefaultArgs->getParamDecl(p);
	ParmVarDecl *NewParam = New->getParamDecl(p);

	bool OldParamHasDfl = OldParam ? OldParam->hasDefaultArg() : false;
	bool NewParamHasDfl = NewParam->hasDefaultArg();

	if (OldParamHasDfl && NewParamHasDfl) {
	unsigned DiagDefaultParamID =
	diag::err_param_default_argument_redefinition;

	// MSVC accepts that default parameters be redefined for member functions
	// of template class. The new default parameter's value is ignored.
	Invalid = true;
	if (getLangOpts().MicrosoftExt) {
	CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(New);
	if (MD && MD->getParent()->getDescribedClassTemplate()) {
	// Merge the old default argument into the new parameter.
	NewParam->setHasInheritedDefaultArg();
	if (OldParam->hasUninstantiatedDefaultArg())
	NewParam->setUninstantiatedDefaultArg(
	OldParam->getUninstantiatedDefaultArg());
	else
	NewParam->setDefaultArg(OldParam->getInit());
	DiagDefaultParamID = diag::ext_param_default_argument_redefinition;
	Invalid = false;
	}
	}

	// FIXME: If we knew where the '=' was, we could easily provide a fix-it
	// hint here. Alternatively, we could walk the type-source information
	// for NewParam to find the last source location in the type... but it
	// isn't worth the effort right now. This is the kind of test case that
	// is hard to get right:
	// int f(int);
	// void g(int (*fp)(int) = f);
	// void g(int (*fp)(int) = &f);
	Diag(NewParam->getLocation(), DiagDefaultParamID)
	<< NewParam->getDefaultArgRange();

	// Look for the function declaration where the default argument was
	// actually written, which may be a declaration prior to Old.
	for (auto Older = PrevForDefaultArgs;
	OldParam->hasInheritedDefaultArg(); /**/) {
	Older = Older->getPreviousDecl();
	OldParam = Older->getParamDecl(p);
	}

	Diag(OldParam->getLocation(), diag::note_previous_definition)
	<< OldParam->getDefaultArgRange();
	} else if (OldParamHasDfl) {
	// Merge the old default argument into the new parameter unless the new
	// function is a friend declaration in a template class. In the latter
	// case the default arguments will be inherited when the friend
	// declaration will be instantiated.
	if (New->getFriendObjectKind() == Decl::FOK_None \|\|
	!New->getLexicalDeclContext()->isDependentContext()) {
	// It's important to use getInit() here; getDefaultArg()
	// strips off any top-level ExprWithCleanups.
	NewParam->setHasInheritedDefaultArg();
	if (OldParam->hasUnparsedDefaultArg())
	NewParam->setUnparsedDefaultArg();
	else if (OldParam->hasUninstantiatedDefaultArg())
	NewParam->setUninstantiatedDefaultArg(
	OldParam->getUninstantiatedDefaultArg());
	else
	NewParam->setDefaultArg(OldParam->getInit());
	}
	} else if (NewParamHasDfl) {
	if (New->getDescribedFunctionTemplate()) {
	// Paragraph 4, quoted above, only applies to non-template functions.
	Diag(NewParam->getLocation(),
	diag::err_param_default_argument_template_redecl)
	<< NewParam->getDefaultArgRange();
	Diag(PrevForDefaultArgs->getLocation(),
	diag::note_template_prev_declaration)
	<< false;
	} else if (New->getTemplateSpecializationKind()
	!= TSK_ImplicitInstantiation &&
	New->getTemplateSpecializationKind() != TSK_Undeclared) {
	// C++ [temp.expr.spec]p21:
	// Default function arguments shall not be specified in a declaration
	// or a definition for one of the following explicit specializations:
	// - the explicit specialization of a function template;
	// - the explicit specialization of a member function template;
	// - the explicit specialization of a member function of a class
	// template where the class template specialization to which the
	// member function specialization belongs is implicitly
	// instantiated.
	Diag(NewParam->getLocation(), diag::err_template_spec_default_arg)
	<< (New->getTemplateSpecializationKind() ==TSK_ExplicitSpecialization)
	<< New->getDeclName()
	<< NewParam->getDefaultArgRange();
	} else if (New->getDeclContext()->isDependentContext()) {
	// C++ [dcl.fct.default]p6 (DR217):
	// Default arguments for a member function of a class template shall
	// be specified on the initial declaration of the member function
	// within the class template.
	//
	// Reading the tea leaves a bit in DR217 and its reference to DR205
	// leads me to the conclusion that one cannot add default function
	// arguments for an out-of-line definition of a member function of a
	// dependent type.
	int WhichKind = 2;
	if (CXXRecordDecl *Record
	= dyn_cast<CXXRecordDecl>(New->getDeclContext())) {
	if (Record->getDescribedClassTemplate())
	WhichKind = 0;
	else if (isa<ClassTemplatePartialSpecializationDecl>(Record))
	WhichKind = 1;
	else
	WhichKind = 2;
	}

	Diag(NewParam->getLocation(),
	diag::err_param_default_argument_member_template_redecl)
	<< WhichKind
	<< NewParam->getDefaultArgRange();
	}
	}
	}

	// DR1344: If a default argument is added outside a class definition and that
	// default argument makes the function a special member function, the program
	// is ill-formed. This can only happen for constructors.
	if (isa<CXXConstructorDecl>(New) &&
	New->getMinRequiredArguments() < Old->getMinRequiredArguments()) {
	CXXSpecialMember NewSM = getSpecialMember(cast<CXXMethodDecl>(New)),
	OldSM = getSpecialMember(cast<CXXMethodDecl>(Old));
	if (NewSM != OldSM) {
	ParmVarDecl *NewParam = New->getParamDecl(New->getMinRequiredArguments());
	assert(NewParam->hasDefaultArg());
	Diag(NewParam->getLocation(), diag::err_default_arg_makes_ctor_special)
	<< NewParam->getDefaultArgRange() << NewSM;
	Diag(Old->getLocation(), diag::note_previous_declaration);
	}
	}

	const FunctionDecl *Def;
	// C++11 [dcl.constexpr]p1: If any declaration of a function or function
	// template has a constexpr specifier then all its declarations shall
	// contain the constexpr specifier.
	if (New->isConstexpr() != Old->isConstexpr()) {
	Diag(New->getLocation(), diag::err_constexpr_redecl_mismatch)
	<< New << New->isConstexpr();
	Diag(Old->getLocation(), diag::note_previous_declaration);
	Invalid = true;
	} else if (!Old->getMostRecentDecl()->isInlined() && New->isInlined() &&
	Old->isDefined(Def) &&
	// If a friend function is inlined but does not have 'inline'
	// specifier, it is a definition. Do not report attribute conflict
	// in this case, redefinition will be diagnosed later.
	(New->isInlineSpecified() \|\|
	New->getFriendObjectKind() == Decl::FOK_None)) {
	// C++11 [dcl.fcn.spec]p4:
	// If the definition of a function appears in a translation unit before its
	// first declaration as inline, the program is ill-formed.
	Diag(New->getLocation(), diag::err_inline_decl_follows_def) << New;
	Diag(Def->getLocation(), diag::note_previous_definition);
	Invalid = true;
	}

	// FIXME: It's not clear what should happen if multiple declarations of a
	// deduction guide have different explicitness. For now at least we simply
	// reject any case where the explicitness changes.
	auto *NewGuide = dyn_cast<CXXDeductionGuideDecl>(New);
	if (NewGuide && NewGuide->isExplicitSpecified() !=
	cast<CXXDeductionGuideDecl>(Old)->isExplicitSpecified()) {
	Diag(New->getLocation(), diag::err_deduction_guide_explicit_mismatch)
	<< NewGuide->isExplicitSpecified();
	Diag(Old->getLocation(), diag::note_previous_declaration);
	}

	// C++11 [dcl.fct.default]p4: If a friend declaration specifies a default
	// argument expression, that declaration shall be a definition and shall be
	// the only declaration of the function or function template in the
	// translation unit.
	if (Old->getFriendObjectKind() == Decl::FOK_Undeclared &&
	functionDeclHasDefaultArgument(Old)) {
	Diag(New->getLocation(), diag::err_friend_decl_with_def_arg_redeclared);
	Diag(Old->getLocation(), diag::note_previous_declaration);
	Invalid = true;
	}

	return Invalid;
	}

	NamedDecl *
	Sema::ActOnDecompositionDeclarator(Scope *S, Declarator &D,
	MultiTemplateParamsArg TemplateParamLists) {
	assert(D.isDecompositionDeclarator());
	const DecompositionDeclarator &Decomp = D.getDecompositionDeclarator();

	// The syntax only allows a decomposition declarator as a simple-declaration
	// or a for-range-declaration, but we parse it in more cases than that.
	if (!D.mayHaveDecompositionDeclarator()) {
	Diag(Decomp.getLSquareLoc(), diag::err_decomp_decl_context)
	<< Decomp.getSourceRange();
	return nullptr;
	}

	if (!TemplateParamLists.empty()) {
	// FIXME: There's no rule against this, but there are also no rules that
	// would actually make it usable, so we reject it for now.
	Diag(TemplateParamLists.front()->getTemplateLoc(),
	diag::err_decomp_decl_template);
	return nullptr;
	}

	Diag(Decomp.getLSquareLoc(), getLangOpts().CPlusPlus1z
	? diag::warn_cxx14_compat_decomp_decl
	: diag::ext_decomp_decl)
	<< Decomp.getSourceRange();

	// The semantic context is always just the current context.
	DeclContext *const DC = CurContext;

	// C++1z [dcl.dcl]/8:
	// The decl-specifier-seq shall contain only the type-specifier auto
	// and cv-qualifiers.
	auto &DS = D.getDeclSpec();
	{
	SmallVector<StringRef, 8> BadSpecifiers;
	SmallVector<SourceLocation, 8> BadSpecifierLocs;
	if (auto SCS = DS.getStorageClassSpec()) {
	BadSpecifiers.push_back(DeclSpec::getSpecifierName(SCS));
	BadSpecifierLocs.push_back(DS.getStorageClassSpecLoc());
	}
	if (auto TSCS = DS.getThreadStorageClassSpec()) {
	BadSpecifiers.push_back(DeclSpec::getSpecifierName(TSCS));
	BadSpecifierLocs.push_back(DS.getThreadStorageClassSpecLoc());
	}
	if (DS.isConstexprSpecified()) {
	BadSpecifiers.push_back("constexpr");
	BadSpecifierLocs.push_back(DS.getConstexprSpecLoc());
	}
	if (DS.isInlineSpecified()) {
	BadSpecifiers.push_back("inline");
	BadSpecifierLocs.push_back(DS.getInlineSpecLoc());
	}
	if (!BadSpecifiers.empty()) {
	auto &&Err = Diag(BadSpecifierLocs.front(), diag::err_decomp_decl_spec);
	Err << (int)BadSpecifiers.size()
	<< llvm::join(BadSpecifiers.begin(), BadSpecifiers.end(), " ");
	// Don't add FixItHints to remove the specifiers; we do still respect
	// them when building the underlying variable.
	for (auto Loc : BadSpecifierLocs)
	Err << SourceRange(Loc, Loc);
	}
	// We can't recover from it being declared as a typedef.
	if (DS.getStorageClassSpec() == DeclSpec::SCS_typedef)
	return nullptr;
	}

	TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
	QualType R = TInfo->getType();

	if (DiagnoseUnexpandedParameterPack(D.getIdentifierLoc(), TInfo,
	UPPC_DeclarationType))
	D.setInvalidType();

	// The syntax only allows a single ref-qualifier prior to the decomposition
	// declarator. No other declarator chunks are permitted. Also check the type
	// specifier here.
	if (DS.getTypeSpecType() != DeclSpec::TST_auto \|\|
	D.hasGroupingParens() \|\| D.getNumTypeObjects() > 1 \|\|
	(D.getNumTypeObjects() == 1 &&
	D.getTypeObject(0).Kind != DeclaratorChunk::Reference)) {
	Diag(Decomp.getLSquareLoc(),
	(D.hasGroupingParens() \|\|
	(D.getNumTypeObjects() &&
	D.getTypeObject(0).Kind == DeclaratorChunk::Paren))
	? diag::err_decomp_decl_parens
	: diag::err_decomp_decl_type)
	<< R;

	// In most cases, there's no actual problem with an explicitly-specified
	// type, but a function type won't work here, and ActOnVariableDeclarator
	// shouldn't be called for such a type.
	if (R->isFunctionType())
	D.setInvalidType();
	}

	// Build the BindingDecls.
	SmallVector<BindingDecl*, 8> Bindings;

	// Build the BindingDecls.
	for (auto &B : D.getDecompositionDeclarator().bindings()) {
	// Check for name conflicts.
	DeclarationNameInfo NameInfo(B.Name, B.NameLoc);
	LookupResult Previous(*this, NameInfo, LookupOrdinaryName,
	ForRedeclaration);
	LookupName(Previous, S,
	/CreateBuiltins/DC->getRedeclContext()->isTranslationUnit());

	// It's not permitted to shadow a template parameter name.
	if (Previous.isSingleResult() &&
	Previous.getFoundDecl()->isTemplateParameter()) {
	DiagnoseTemplateParameterShadow(D.getIdentifierLoc(),
	Previous.getFoundDecl());
	Previous.clear();
	}

	bool ConsiderLinkage = DC->isFunctionOrMethod() &&
	DS.getStorageClassSpec() == DeclSpec::SCS_extern;
	FilterLookupForScope(Previous, DC, S, ConsiderLinkage,
	/AllowInlineNamespace/false);
	if (!Previous.empty()) {
	auto *Old = Previous.getRepresentativeDecl();
	Diag(B.NameLoc, diag::err_redefinition) << B.Name;
	Diag(Old->getLocation(), diag::note_previous_definition);
	}

	auto *BD = BindingDecl::Create(Context, DC, B.NameLoc, B.Name);
	PushOnScopeChains(BD, S, true);
	Bindings.push_back(BD);
	ParsingInitForAutoVars.insert(BD);
	}

	// There are no prior lookup results for the variable itself, because it
	// is unnamed.
	DeclarationNameInfo NameInfo((IdentifierInfo *)nullptr,
	Decomp.getLSquareLoc());
	LookupResult Previous(*this, NameInfo, LookupOrdinaryName, ForRedeclaration);

	// Build the variable that holds the non-decomposed object.
	bool AddToScope = true;
	NamedDecl *New =
	ActOnVariableDeclarator(S, D, DC, TInfo, Previous,
	MultiTemplateParamsArg(), AddToScope, Bindings);
	CurContext->addHiddenDecl(New);

	if (isInOpenMPDeclareTargetContext())
	checkDeclIsAllowedInOpenMPTarget(nullptr, New);

	return New;
	}

	static bool checkSimpleDecomposition(
	Sema &S, ArrayRef<BindingDecl > Bindings, ValueDecl Src,
	QualType DecompType, const llvm::APSInt &NumElems, QualType ElemType,
	llvm::function_ref<ExprResult(SourceLocation, Expr *, unsigned)> GetInit) {
	if ((int64_t)Bindings.size() != NumElems) {
	S.Diag(Src->getLocation(), diag::err_decomp_decl_wrong_number_bindings)
	<< DecompType << (unsigned)Bindings.size() << NumElems.toString(10)
	<< (NumElems < Bindings.size());
	return true;
	}

	unsigned I = 0;
	for (auto *B : Bindings) {
	SourceLocation Loc = B->getLocation();
	ExprResult E = S.BuildDeclRefExpr(Src, DecompType, VK_LValue, Loc);
	if (E.isInvalid())
	return true;
	E = GetInit(Loc, E.get(), I++);
	if (E.isInvalid())
	return true;
	B->setBinding(ElemType, E.get());
	}

	return false;
	}

	static bool checkArrayLikeDecomposition(Sema &S,
	ArrayRef<BindingDecl *> Bindings,
	ValueDecl *Src, QualType DecompType,
	const llvm::APSInt &NumElems,
	QualType ElemType) {
	return checkSimpleDecomposition(
	S, Bindings, Src, DecompType, NumElems, ElemType,
	[&](SourceLocation Loc, Expr *Base, unsigned I) -> ExprResult {
	ExprResult E = S.ActOnIntegerConstant(Loc, I);
	if (E.isInvalid())
	return ExprError();
	return S.CreateBuiltinArraySubscriptExpr(Base, Loc, E.get(), Loc);
	});
	}

	static bool checkArrayDecomposition(Sema &S, ArrayRef<BindingDecl*> Bindings,
	ValueDecl *Src, QualType DecompType,
	const ConstantArrayType *CAT) {
	return checkArrayLikeDecomposition(S, Bindings, Src, DecompType,
	llvm::APSInt(CAT->getSize()),
	CAT->getElementType());
	}

	static bool checkVectorDecomposition(Sema &S, ArrayRef<BindingDecl*> Bindings,
	ValueDecl *Src, QualType DecompType,
	const VectorType *VT) {
	return checkArrayLikeDecomposition(
	S, Bindings, Src, DecompType, llvm::APSInt::get(VT->getNumElements()),
	S.Context.getQualifiedType(VT->getElementType(),
	DecompType.getQualifiers()));
	}

	static bool checkComplexDecomposition(Sema &S,
	ArrayRef<BindingDecl *> Bindings,
	ValueDecl *Src, QualType DecompType,
	const ComplexType *CT) {
	return checkSimpleDecomposition(
	S, Bindings, Src, DecompType, llvm::APSInt::get(2),
	S.Context.getQualifiedType(CT->getElementType(),
	DecompType.getQualifiers()),
	[&](SourceLocation Loc, Expr *Base, unsigned I) -> ExprResult {
	return S.CreateBuiltinUnaryOp(Loc, I ? UO_Imag : UO_Real, Base);
	});
	}

	static std::string printTemplateArgs(const PrintingPolicy &PrintingPolicy,
	TemplateArgumentListInfo &Args) {
	SmallString<128> SS;
	llvm::raw_svector_ostream OS(SS);
	bool First = true;
	for (auto &Arg : Args.arguments()) {
	if (!First)
	OS << ", ";
	Arg.getArgument().print(PrintingPolicy, OS);
	First = false;
	}
	return OS.str();
	}

	static bool lookupStdTypeTraitMember(Sema &S, LookupResult &TraitMemberLookup,
	SourceLocation Loc, StringRef Trait,
	TemplateArgumentListInfo &Args,
	unsigned DiagID) {
	auto DiagnoseMissing = [&] {
	if (DiagID)
	S.Diag(Loc, DiagID) << printTemplateArgs(S.Context.getPrintingPolicy(),
	Args);
	return true;
	};

	// FIXME: Factor out duplication with lookupPromiseType in SemaCoroutine.
	NamespaceDecl *Std = S.getStdNamespace();
	if (!Std)
	return DiagnoseMissing();

	// Look up the trait itself, within namespace std. We can diagnose various
	// problems with this lookup even if we've been asked to not diagnose a
	// missing specialization, because this can only fail if the user has been
	// declaring their own names in namespace std or we don't support the
	// standard library implementation in use.
	LookupResult Result(S, &S.PP.getIdentifierTable().get(Trait),
	Loc, Sema::LookupOrdinaryName);
	if (!S.LookupQualifiedName(Result, Std))
	return DiagnoseMissing();
	if (Result.isAmbiguous())
	return true;

	ClassTemplateDecl *TraitTD = Result.getAsSingle<ClassTemplateDecl>();
	if (!TraitTD) {
	Result.suppressDiagnostics();
	NamedDecl Found = Result.begin();
	S.Diag(Loc, diag::err_std_type_trait_not_class_template) << Trait;
	S.Diag(Found->getLocation(), diag::note_declared_at);
	return true;
	}

	// Build the template-id.
	QualType TraitTy = S.CheckTemplateIdType(TemplateName(TraitTD), Loc, Args);
	if (TraitTy.isNull())
	return true;
	if (!S.isCompleteType(Loc, TraitTy)) {
	if (DiagID)
	S.RequireCompleteType(
	Loc, TraitTy, DiagID,
	printTemplateArgs(S.Context.getPrintingPolicy(), Args));
	return true;
	}

	CXXRecordDecl *RD = TraitTy->getAsCXXRecordDecl();
	assert(RD && "specialization of class template is not a class?");

	// Look up the member of the trait type.
	S.LookupQualifiedName(TraitMemberLookup, RD);
	return TraitMemberLookup.isAmbiguous();
	}

	static TemplateArgumentLoc
	getTrivialIntegralTemplateArgument(Sema &S, SourceLocation Loc, QualType T,
	uint64_t I) {
	TemplateArgument Arg(S.Context, S.Context.MakeIntValue(I, T), T);
	return S.getTrivialTemplateArgumentLoc(Arg, T, Loc);
	}

	static TemplateArgumentLoc
	getTrivialTypeTemplateArgument(Sema &S, SourceLocation Loc, QualType T) {
	return S.getTrivialTemplateArgumentLoc(TemplateArgument(T), QualType(), Loc);
	}

	namespace { enum class IsTupleLike { TupleLike, NotTupleLike, Error }; }

	static IsTupleLike isTupleLike(Sema &S, SourceLocation Loc, QualType T,
	llvm::APSInt &Size) {
	EnterExpressionEvaluationContext ContextRAII(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	DeclarationName Value = S.PP.getIdentifierInfo("value");
	LookupResult R(S, Value, Loc, Sema::LookupOrdinaryName);

	// Form template argument list for tuple_size<T>.
	TemplateArgumentListInfo Args(Loc, Loc);
	Args.addArgument(getTrivialTypeTemplateArgument(S, Loc, T));

	// If there's no tuple_size specialization, it's not tuple-like.
	if (lookupStdTypeTraitMember(S, R, Loc, "tuple_size", Args, /DiagID/0))
	return IsTupleLike::NotTupleLike;

	// If we get this far, we've committed to the tuple interpretation, but
	// we can still fail if there actually isn't a usable ::value.

	struct ICEDiagnoser : Sema::VerifyICEDiagnoser {
	LookupResult &R;
	TemplateArgumentListInfo &Args;
	ICEDiagnoser(LookupResult &R, TemplateArgumentListInfo &Args)
	: R(R), Args(Args) {}
	void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) {
	S.Diag(Loc, diag::err_decomp_decl_std_tuple_size_not_constant)
	<< printTemplateArgs(S.Context.getPrintingPolicy(), Args);
	}
	} Diagnoser(R, Args);

	if (R.empty()) {
	Diagnoser.diagnoseNotICE(S, Loc, SourceRange());
	return IsTupleLike::Error;
	}

	ExprResult E =
	S.BuildDeclarationNameExpr(CXXScopeSpec(), R, /NeedsADL/false);
	if (E.isInvalid())
	return IsTupleLike::Error;

	E = S.VerifyIntegerConstantExpression(E.get(), &Size, Diagnoser, false);
	if (E.isInvalid())
	return IsTupleLike::Error;

	return IsTupleLike::TupleLike;
	}

	/// \return std::tuple_element<I, T>::type.
	static QualType getTupleLikeElementType(Sema &S, SourceLocation Loc,
	unsigned I, QualType T) {
	// Form template argument list for tuple_element<I, T>.
	TemplateArgumentListInfo Args(Loc, Loc);
	Args.addArgument(
	getTrivialIntegralTemplateArgument(S, Loc, S.Context.getSizeType(), I));
	Args.addArgument(getTrivialTypeTemplateArgument(S, Loc, T));

	DeclarationName TypeDN = S.PP.getIdentifierInfo("type");
	LookupResult R(S, TypeDN, Loc, Sema::LookupOrdinaryName);
	if (lookupStdTypeTraitMember(
	S, R, Loc, "tuple_element", Args,
	diag::err_decomp_decl_std_tuple_element_not_specialized))
	return QualType();

	auto *TD = R.getAsSingle<TypeDecl>();
	if (!TD) {
	R.suppressDiagnostics();
	S.Diag(Loc, diag::err_decomp_decl_std_tuple_element_not_specialized)
	<< printTemplateArgs(S.Context.getPrintingPolicy(), Args);
	if (!R.empty())
	S.Diag(R.getRepresentativeDecl()->getLocation(), diag::note_declared_at);
	return QualType();
	}

	return S.Context.getTypeDeclType(TD);
	}

	namespace {
	struct BindingDiagnosticTrap {
	Sema &S;
	DiagnosticErrorTrap Trap;
	BindingDecl *BD;

	BindingDiagnosticTrap(Sema &S, BindingDecl *BD)
	: S(S), Trap(S.Diags), BD(BD) {}
	~BindingDiagnosticTrap() {
	if (Trap.hasErrorOccurred())
	S.Diag(BD->getLocation(), diag::note_in_binding_decl_init) << BD;
	}
	};
	}

	static bool checkTupleLikeDecomposition(Sema &S,
	ArrayRef<BindingDecl *> Bindings,
	VarDecl *Src, QualType DecompType,
	const llvm::APSInt &TupleSize) {
	if ((int64_t)Bindings.size() != TupleSize) {
	S.Diag(Src->getLocation(), diag::err_decomp_decl_wrong_number_bindings)
	<< DecompType << (unsigned)Bindings.size() << TupleSize.toString(10)
	<< (TupleSize < Bindings.size());
	return true;
	}

	if (Bindings.empty())
	return false;

	DeclarationName GetDN = S.PP.getIdentifierInfo("get");

	// [dcl.decomp]p3:
	// The unqualified-id get is looked up in the scope of E by class member
	// access lookup
	LookupResult MemberGet(S, GetDN, Src->getLocation(), Sema::LookupMemberName);
	bool UseMemberGet = false;
	if (S.isCompleteType(Src->getLocation(), DecompType)) {
	if (auto *RD = DecompType->getAsCXXRecordDecl())
	S.LookupQualifiedName(MemberGet, RD);
	if (MemberGet.isAmbiguous())
	return true;
	UseMemberGet = !MemberGet.empty();
	S.FilterAcceptableTemplateNames(MemberGet);
	}

	unsigned I = 0;
	for (auto *B : Bindings) {
	BindingDiagnosticTrap Trap(S, B);
	SourceLocation Loc = B->getLocation();

	ExprResult E = S.BuildDeclRefExpr(Src, DecompType, VK_LValue, Loc);
	if (E.isInvalid())
	return true;

	// e is an lvalue if the type of the entity is an lvalue reference and
	// an xvalue otherwise
	if (!Src->getType()->isLValueReferenceType())
	E = ImplicitCastExpr::Create(S.Context, E.get()->getType(), CK_NoOp,
	E.get(), nullptr, VK_XValue);

	TemplateArgumentListInfo Args(Loc, Loc);
	Args.addArgument(
	getTrivialIntegralTemplateArgument(S, Loc, S.Context.getSizeType(), I));

	if (UseMemberGet) {
	// if [lookup of member get] finds at least one declaration, the
	// initializer is e.get<i-1>().
	E = S.BuildMemberReferenceExpr(E.get(), DecompType, Loc, false,
	CXXScopeSpec(), SourceLocation(), nullptr,
	MemberGet, &Args, nullptr);
	if (E.isInvalid())
	return true;

	E = S.ActOnCallExpr(nullptr, E.get(), Loc, None, Loc);
	} else {
	// Otherwise, the initializer is get<i-1>(e), where get is looked up
	// in the associated namespaces.
	Expr *Get = UnresolvedLookupExpr::Create(
	S.Context, nullptr, NestedNameSpecifierLoc(), SourceLocation(),
	DeclarationNameInfo(GetDN, Loc), /RequiresADL/true, &Args,
	UnresolvedSetIterator(), UnresolvedSetIterator());

	Expr *Arg = E.get();
	E = S.ActOnCallExpr(nullptr, Get, Loc, Arg, Loc);
	}
	if (E.isInvalid())
	return true;
	Expr *Init = E.get();

	// Given the type T designated by std::tuple_element<i - 1, E>::type,
	QualType T = getTupleLikeElementType(S, Loc, I, DecompType);
	if (T.isNull())
	return true;

	// each vi is a variable of type "reference to T" initialized with the
	// initializer, where the reference is an lvalue reference if the
	// initializer is an lvalue and an rvalue reference otherwise
	QualType RefType =
	S.BuildReferenceType(T, E.get()->isLValue(), Loc, B->getDeclName());
	if (RefType.isNull())
	return true;
	auto *RefVD = VarDecl::Create(
	S.Context, Src->getDeclContext(), Loc, Loc,
	B->getDeclName().getAsIdentifierInfo(), RefType,
	S.Context.getTrivialTypeSourceInfo(T, Loc), Src->getStorageClass());
	RefVD->setLexicalDeclContext(Src->getLexicalDeclContext());
	RefVD->setTSCSpec(Src->getTSCSpec());
	RefVD->setImplicit();
	if (Src->isInlineSpecified())
	RefVD->setInlineSpecified();
	RefVD->getLexicalDeclContext()->addHiddenDecl(RefVD);

	InitializedEntity Entity = InitializedEntity::InitializeBinding(RefVD);
	InitializationKind Kind = InitializationKind::CreateCopy(Loc, Loc);
	InitializationSequence Seq(S, Entity, Kind, Init);
	E = Seq.Perform(S, Entity, Kind, Init);
	if (E.isInvalid())
	return true;
	E = S.ActOnFinishFullExpr(E.get(), Loc);
	if (E.isInvalid())
	return true;
	RefVD->setInit(E.get());
	RefVD->checkInitIsICE();

	E = S.BuildDeclarationNameExpr(CXXScopeSpec(),
	DeclarationNameInfo(B->getDeclName(), Loc),
	RefVD);
	if (E.isInvalid())
	return true;

	B->setBinding(T, E.get());
	I++;
	}

	return false;
	}

	/// Find the base class to decompose in a built-in decomposition of a class type.
	/// This base class search is, unfortunately, not quite like any other that we
	/// perform anywhere else in C++.
	static const CXXRecordDecl *findDecomposableBaseClass(Sema &S,
	SourceLocation Loc,
	const CXXRecordDecl *RD,
	CXXCastPath &BasePath) {
	auto BaseHasFields = [](const CXXBaseSpecifier *Specifier,
	CXXBasePath &Path) {
	return Specifier->getType()->getAsCXXRecordDecl()->hasDirectFields();
	};

	const CXXRecordDecl *ClassWithFields = nullptr;
	if (RD->hasDirectFields())
	// [dcl.decomp]p4:
	// Otherwise, all of E's non-static data members shall be public direct
	// members of E ...
	ClassWithFields = RD;
	else {
	// ... or of ...
	CXXBasePaths Paths;
	Paths.setOrigin(const_cast<CXXRecordDecl*>(RD));
	if (!RD->lookupInBases(BaseHasFields, Paths)) {
	// If no classes have fields, just decompose RD itself. (This will work
	// if and only if zero bindings were provided.)
	return RD;
	}

	CXXBasePath *BestPath = nullptr;
	for (auto &P : Paths) {
	if (!BestPath)
	BestPath = &P;
	else if (!S.Context.hasSameType(P.back().Base->getType(),
	BestPath->back().Base->getType())) {
	// ... the same ...
	S.Diag(Loc, diag::err_decomp_decl_multiple_bases_with_members)
	<< false << RD << BestPath->back().Base->getType()
	<< P.back().Base->getType();
	return nullptr;
	} else if (P.Access < BestPath->Access) {
	BestPath = &P;
	}
	}

	// ... unambiguous ...
	QualType BaseType = BestPath->back().Base->getType();
	if (Paths.isAmbiguous(S.Context.getCanonicalType(BaseType))) {
	S.Diag(Loc, diag::err_decomp_decl_ambiguous_base)
	<< RD << BaseType << S.getAmbiguousPathsDisplayString(Paths);
	return nullptr;
	}

	// ... public base class of E.
	if (BestPath->Access != AS_public) {
	S.Diag(Loc, diag::err_decomp_decl_non_public_base)
	<< RD << BaseType;
	for (auto &BS : *BestPath) {
	if (BS.Base->getAccessSpecifier() != AS_public) {
	S.Diag(BS.Base->getLocStart(), diag::note_access_constrained_by_path)
	<< (BS.Base->getAccessSpecifier() == AS_protected)
	<< (BS.Base->getAccessSpecifierAsWritten() == AS_none);
	break;
	}
	}
	return nullptr;
	}

	ClassWithFields = BaseType->getAsCXXRecordDecl();
	S.BuildBasePathArray(Paths, BasePath);
	}

	// The above search did not check whether the selected class itself has base
	// classes with fields, so check that now.
	CXXBasePaths Paths;
	if (ClassWithFields->lookupInBases(BaseHasFields, Paths)) {
	S.Diag(Loc, diag::err_decomp_decl_multiple_bases_with_members)
	<< (ClassWithFields == RD) << RD << ClassWithFields
	<< Paths.front().back().Base->getType();
	return nullptr;
	}

	return ClassWithFields;
	}

	static bool checkMemberDecomposition(Sema &S, ArrayRef<BindingDecl*> Bindings,
	ValueDecl *Src, QualType DecompType,
	const CXXRecordDecl *RD) {
	CXXCastPath BasePath;
	RD = findDecomposableBaseClass(S, Src->getLocation(), RD, BasePath);
	if (!RD)
	return true;
	QualType BaseType = S.Context.getQualifiedType(S.Context.getRecordType(RD),
	DecompType.getQualifiers());

	auto DiagnoseBadNumberOfBindings = [&]() -> bool {
	unsigned NumFields =
	std::count_if(RD->field_begin(), RD->field_end(),
	[](FieldDecl *FD) { return !FD->isUnnamedBitfield(); });
	assert(Bindings.size() != NumFields);
	S.Diag(Src->getLocation(), diag::err_decomp_decl_wrong_number_bindings)
	<< DecompType << (unsigned)Bindings.size() << NumFields
	<< (NumFields < Bindings.size());
	return true;
	};

	// all of E's non-static data members shall be public [...] members,
	// E shall not have an anonymous union member, ...
	unsigned I = 0;
	for (auto *FD : RD->fields()) {
	if (FD->isUnnamedBitfield())
	continue;

	if (FD->isAnonymousStructOrUnion()) {
	S.Diag(Src->getLocation(), diag::err_decomp_decl_anon_union_member)
	<< DecompType << FD->getType()->isUnionType();
	S.Diag(FD->getLocation(), diag::note_declared_at);
	return true;
	}

	// We have a real field to bind.
	if (I >= Bindings.size())
	return DiagnoseBadNumberOfBindings();
	auto *B = Bindings[I++];

	SourceLocation Loc = B->getLocation();
	if (FD->getAccess() != AS_public) {
	S.Diag(Loc, diag::err_decomp_decl_non_public_member) << FD << DecompType;

	// Determine whether the access specifier was explicit.
	bool Implicit = true;
	for (const auto *D : RD->decls()) {
	if (declaresSameEntity(D, FD))
	break;
	if (isa<AccessSpecDecl>(D)) {
	Implicit = false;
	break;
	}
	}

	S.Diag(FD->getLocation(), diag::note_access_natural)
	<< (FD->getAccess() == AS_protected) << Implicit;
	return true;
	}

	// Initialize the binding to Src.FD.
	ExprResult E = S.BuildDeclRefExpr(Src, DecompType, VK_LValue, Loc);
	if (E.isInvalid())
	return true;
	E = S.ImpCastExprToType(E.get(), BaseType, CK_UncheckedDerivedToBase,
	VK_LValue, &BasePath);
	if (E.isInvalid())
	return true;
	E = S.BuildFieldReferenceExpr(E.get(), /IsArrow/ false, Loc,
	CXXScopeSpec(), FD,
	DeclAccessPair::make(FD, FD->getAccess()),
	DeclarationNameInfo(FD->getDeclName(), Loc));
	if (E.isInvalid())
	return true;

	// If the type of the member is T, the referenced type is cv T, where cv is
	// the cv-qualification of the decomposition expression.
	//
	// FIXME: We resolve a defect here: if the field is mutable, we do not add
	// 'const' to the type of the field.
	Qualifiers Q = DecompType.getQualifiers();
	if (FD->isMutable())
	Q.removeConst();
	B->setBinding(S.BuildQualifiedType(FD->getType(), Loc, Q), E.get());
	}

	if (I != Bindings.size())
	return DiagnoseBadNumberOfBindings();

	return false;
	}

	void Sema::CheckCompleteDecompositionDeclaration(DecompositionDecl *DD) {
	QualType DecompType = DD->getType();

	// If the type of the decomposition is dependent, then so is the type of
	// each binding.
	if (DecompType->isDependentType()) {
	for (auto *B : DD->bindings())
	B->setType(Context.DependentTy);
	return;
	}

	DecompType = DecompType.getNonReferenceType();
	ArrayRef<BindingDecl*> Bindings = DD->bindings();

	// C++1z [dcl.decomp]/2:
	// If E is an array type [...]
	// As an extension, we also support decomposition of built-in complex and
	// vector types.
	if (auto *CAT = Context.getAsConstantArrayType(DecompType)) {
	if (checkArrayDecomposition(*this, Bindings, DD, DecompType, CAT))
	DD->setInvalidDecl();
	return;
	}
	if (auto *VT = DecompType->getAs<VectorType>()) {
	if (checkVectorDecomposition(*this, Bindings, DD, DecompType, VT))
	DD->setInvalidDecl();
	return;
	}
	if (auto *CT = DecompType->getAs<ComplexType>()) {
	if (checkComplexDecomposition(*this, Bindings, DD, DecompType, CT))
	DD->setInvalidDecl();
	return;
	}

	// C++1z [dcl.decomp]/3:
	// if the expression std::tuple_size<E>::value is a well-formed integral
	// constant expression, [...]
	llvm::APSInt TupleSize(32);
	switch (isTupleLike(*this, DD->getLocation(), DecompType, TupleSize)) {
	case IsTupleLike::Error:
	DD->setInvalidDecl();
	return;

	case IsTupleLike::TupleLike:
	if (checkTupleLikeDecomposition(*this, Bindings, DD, DecompType, TupleSize))
	DD->setInvalidDecl();
	return;

	case IsTupleLike::NotTupleLike:
	break;
	}

	// C++1z [dcl.dcl]/8:
	// [E shall be of array or non-union class type]
	CXXRecordDecl *RD = DecompType->getAsCXXRecordDecl();
	if (!RD \|\| RD->isUnion()) {
	Diag(DD->getLocation(), diag::err_decomp_decl_unbindable_type)
	<< DD << !RD << DecompType;
	DD->setInvalidDecl();
	return;
	}

	// C++1z [dcl.decomp]/4:
	// all of E's non-static data members shall be [...] direct members of
	// E or of the same unambiguous public base class of E, ...
	if (checkMemberDecomposition(*this, Bindings, DD, DecompType, RD))
	DD->setInvalidDecl();
	}

	/// \brief Merge the exception specifications of two variable declarations.
	///
	/// This is called when there's a redeclaration of a VarDecl. The function
	/// checks if the redeclaration might have an exception specification and
	/// validates compatibility and merges the specs if necessary.
	void Sema::MergeVarDeclExceptionSpecs(VarDecl New, VarDecl Old) {
	// Shortcut if exceptions are disabled.
	if (!getLangOpts().CXXExceptions)
	return;

	assert(Context.hasSameType(New->getType(), Old->getType()) &&
	"Should only be called if types are otherwise the same.");

	QualType NewType = New->getType();
	QualType OldType = Old->getType();

	// We're only interested in pointers and references to functions, as well
	// as pointers to member functions.
	if (const ReferenceType *R = NewType->getAs<ReferenceType>()) {
	NewType = R->getPointeeType();
	OldType = OldType->getAs<ReferenceType>()->getPointeeType();
	} else if (const PointerType *P = NewType->getAs<PointerType>()) {
	NewType = P->getPointeeType();
	OldType = OldType->getAs<PointerType>()->getPointeeType();
	} else if (const MemberPointerType *M = NewType->getAs<MemberPointerType>()) {
	NewType = M->getPointeeType();
	OldType = OldType->getAs<MemberPointerType>()->getPointeeType();
	}

	if (!NewType->isFunctionProtoType())
	return;

	// There's lots of special cases for functions. For function pointers, system
	// libraries are hopefully not as broken so that we don't need these
	// workarounds.
	if (CheckEquivalentExceptionSpec(
	OldType->getAs<FunctionProtoType>(), Old->getLocation(),
	NewType->getAs<FunctionProtoType>(), New->getLocation())) {
	New->setInvalidDecl();
	}
	}

	/// CheckCXXDefaultArguments - Verify that the default arguments for a
	/// function declaration are well-formed according to C++
	/// [dcl.fct.default].
	void Sema::CheckCXXDefaultArguments(FunctionDecl *FD) {
	unsigned NumParams = FD->getNumParams();
	unsigned p;

	// Find first parameter with a default argument
	for (p = 0; p < NumParams; ++p) {
	ParmVarDecl *Param = FD->getParamDecl(p);
	if (Param->hasDefaultArg())
	break;
	}

	// C++11 [dcl.fct.default]p4:
	// In a given function declaration, each parameter subsequent to a parameter
	// with a default argument shall have a default argument supplied in this or
	// a previous declaration or shall be a function parameter pack. A default
	// argument shall not be redefined by a later declaration (not even to the
	// same value).
	unsigned LastMissingDefaultArg = 0;
	for (; p < NumParams; ++p) {
	ParmVarDecl *Param = FD->getParamDecl(p);
	if (!Param->hasDefaultArg() && !Param->isParameterPack()) {
	if (Param->isInvalidDecl())
	/* We already complained about this parameter. */;
	else if (Param->getIdentifier())
	Diag(Param->getLocation(),
	diag::err_param_default_argument_missing_name)
	<< Param->getIdentifier();
	else
	Diag(Param->getLocation(),
	diag::err_param_default_argument_missing);

	LastMissingDefaultArg = p;
	}
	}

	if (LastMissingDefaultArg > 0) {
	// Some default arguments were missing. Clear out all of the
	// default arguments up to (and including) the last missing
	// default argument, so that we leave the function parameters
	// in a semantically valid state.
	for (p = 0; p <= LastMissingDefaultArg; ++p) {
	ParmVarDecl *Param = FD->getParamDecl(p);
	if (Param->hasDefaultArg()) {
	Param->setDefaultArg(nullptr);
	}
	}
	}
	}

	// CheckConstexprParameterTypes - Check whether a function's parameter types
	// are all literal types. If so, return true. If not, produce a suitable
	// diagnostic and return false.
	static bool CheckConstexprParameterTypes(Sema &SemaRef,
	const FunctionDecl *FD) {
	unsigned ArgIndex = 0;
	const FunctionProtoType *FT = FD->getType()->getAs<FunctionProtoType>();
	for (FunctionProtoType::param_type_iterator i = FT->param_type_begin(),
	e = FT->param_type_end();
	i != e; ++i, ++ArgIndex) {
	const ParmVarDecl *PD = FD->getParamDecl(ArgIndex);
	SourceLocation ParamLoc = PD->getLocation();
	if (!(*i)->isDependentType() &&
	SemaRef.RequireLiteralType(ParamLoc, *i,
	diag::err_constexpr_non_literal_param,
	ArgIndex+1, PD->getSourceRange(),
	isa<CXXConstructorDecl>(FD)))
	return false;
	}
	return true;
	}

	/// \brief Get diagnostic %select index for tag kind for
	/// record diagnostic message.
	/// WARNING: Indexes apply to particular diagnostics only!
	///
	/// \returns diagnostic %select index.
	static unsigned getRecordDiagFromTagKind(TagTypeKind Tag) {
	switch (Tag) {
	case TTK_Struct: return 0;
	case TTK_Interface: return 1;
	case TTK_Class: return 2;
	default: llvm_unreachable("Invalid tag kind for record diagnostic!");
	}
	}

	// CheckConstexprFunctionDecl - Check whether a function declaration satisfies
	// the requirements of a constexpr function definition or a constexpr
	// constructor definition. If so, return true. If not, produce appropriate
	// diagnostics and return false.
	//
	// This implements C++11 [dcl.constexpr]p3,4, as amended by DR1360.
	bool Sema::CheckConstexprFunctionDecl(const FunctionDecl *NewFD) {
	const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(NewFD);
	if (MD && MD->isInstance()) {
	// C++11 [dcl.constexpr]p4:
	// The definition of a constexpr constructor shall satisfy the following
	// constraints:
	// - the class shall not have any virtual base classes;
	const CXXRecordDecl *RD = MD->getParent();
	if (RD->getNumVBases()) {
	Diag(NewFD->getLocation(), diag::err_constexpr_virtual_base)
	<< isa<CXXConstructorDecl>(NewFD)
	<< getRecordDiagFromTagKind(RD->getTagKind()) << RD->getNumVBases();
	for (const auto &I : RD->vbases())
	Diag(I.getLocStart(),
	diag::note_constexpr_virtual_base_here) << I.getSourceRange();
	return false;
	}
	}

	if (!isa<CXXConstructorDecl>(NewFD)) {
	// C++11 [dcl.constexpr]p3:
	// The definition of a constexpr function shall satisfy the following
	// constraints:
	// - it shall not be virtual;
	const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(NewFD);
	if (Method && Method->isVirtual()) {
	Method = Method->getCanonicalDecl();
	Diag(Method->getLocation(), diag::err_constexpr_virtual);

	// If it's not obvious why this function is virtual, find an overridden
	// function which uses the 'virtual' keyword.
	const CXXMethodDecl *WrittenVirtual = Method;
	while (!WrittenVirtual->isVirtualAsWritten())
	WrittenVirtual = *WrittenVirtual->begin_overridden_methods();
	if (WrittenVirtual != Method)
	Diag(WrittenVirtual->getLocation(),
	diag::note_overridden_virtual_function);
	return false;
	}

	// - its return type shall be a literal type;
	QualType RT = NewFD->getReturnType();
	if (!RT->isDependentType() &&
	RequireLiteralType(NewFD->getLocation(), RT,
	diag::err_constexpr_non_literal_return))
	return false;
	}

	// - each of its parameter types shall be a literal type;
	if (!CheckConstexprParameterTypes(*this, NewFD))
	return false;

	return true;
	}

	/// Check the given declaration statement is legal within a constexpr function
	/// body. C++11 [dcl.constexpr]p3,p4, and C++1y [dcl.constexpr]p3.
	///
	/// \return true if the body is OK (maybe only as an extension), false if we
	/// have diagnosed a problem.
	static bool CheckConstexprDeclStmt(Sema &SemaRef, const FunctionDecl *Dcl,
	DeclStmt *DS, SourceLocation &Cxx1yLoc) {
	// C++11 [dcl.constexpr]p3 and p4:
	// The definition of a constexpr function(p3) or constructor(p4) [...] shall
	// contain only
	for (const auto *DclIt : DS->decls()) {
	switch (DclIt->getKind()) {
	case Decl::StaticAssert:
	case Decl::Using:
	case Decl::UsingShadow:
	case Decl::UsingDirective:
	case Decl::UnresolvedUsingTypename:
	case Decl::UnresolvedUsingValue:
	// - static_assert-declarations
	// - using-declarations,
	// - using-directives,
	continue;

	case Decl::Typedef:
	case Decl::TypeAlias: {
	// - typedef declarations and alias-declarations that do not define
	// classes or enumerations,
	const auto *TN = cast<TypedefNameDecl>(DclIt);
	if (TN->getUnderlyingType()->isVariablyModifiedType()) {
	// Don't allow variably-modified types in constexpr functions.
	TypeLoc TL = TN->getTypeSourceInfo()->getTypeLoc();
	SemaRef.Diag(TL.getBeginLoc(), diag::err_constexpr_vla)
	<< TL.getSourceRange() << TL.getType()
	<< isa<CXXConstructorDecl>(Dcl);
	return false;
	}
	continue;
	}

	case Decl::Enum:
	case Decl::CXXRecord:
	// C++1y allows types to be defined, not just declared.
	if (cast<TagDecl>(DclIt)->isThisDeclarationADefinition())
	SemaRef.Diag(DS->getLocStart(),
	SemaRef.getLangOpts().CPlusPlus14
	? diag::warn_cxx11_compat_constexpr_type_definition
	: diag::ext_constexpr_type_definition)
	<< isa<CXXConstructorDecl>(Dcl);
	continue;

	case Decl::EnumConstant:
	case Decl::IndirectField:
	case Decl::ParmVar:
	// These can only appear with other declarations which are banned in
	// C++11 and permitted in C++1y, so ignore them.
	continue;

	case Decl::Var:
	case Decl::Decomposition: {
	// C++1y [dcl.constexpr]p3 allows anything except:
	// a definition of a variable of non-literal type or of static or
	// thread storage duration or for which no initialization is performed.
	const auto *VD = cast<VarDecl>(DclIt);
	if (VD->isThisDeclarationADefinition()) {
	if (VD->isStaticLocal()) {
	SemaRef.Diag(VD->getLocation(),
	diag::err_constexpr_local_var_static)
	<< isa<CXXConstructorDecl>(Dcl)
	<< (VD->getTLSKind() == VarDecl::TLS_Dynamic);
	return false;
	}
	if (!VD->getType()->isDependentType() &&
	SemaRef.RequireLiteralType(
	VD->getLocation(), VD->getType(),
	diag::err_constexpr_local_var_non_literal_type,
	isa<CXXConstructorDecl>(Dcl)))
	return false;
	if (!VD->getType()->isDependentType() &&
	!VD->hasInit() && !VD->isCXXForRangeDecl()) {
	SemaRef.Diag(VD->getLocation(),
	diag::err_constexpr_local_var_no_init)
	<< isa<CXXConstructorDecl>(Dcl);
	return false;
	}
	}
	SemaRef.Diag(VD->getLocation(),
	SemaRef.getLangOpts().CPlusPlus14
	? diag::warn_cxx11_compat_constexpr_local_var
	: diag::ext_constexpr_local_var)
	<< isa<CXXConstructorDecl>(Dcl);
	continue;
	}

	case Decl::NamespaceAlias:
	case Decl::Function:
	// These are disallowed in C++11 and permitted in C++1y. Allow them
	// everywhere as an extension.
	if (!Cxx1yLoc.isValid())
	Cxx1yLoc = DS->getLocStart();
	continue;

	default:
	SemaRef.Diag(DS->getLocStart(), diag::err_constexpr_body_invalid_stmt)
	<< isa<CXXConstructorDecl>(Dcl);
	return false;
	}
	}

	return true;
	}

	/// Check that the given field is initialized within a constexpr constructor.
	///
	/// \param Dcl The constexpr constructor being checked.
	/// \param Field The field being checked. This may be a member of an anonymous
	/// struct or union nested within the class being checked.
	/// \param Inits All declarations, including anonymous struct/union members and
	/// indirect members, for which any initialization was provided.
	/// \param Diagnosed Set to true if an error is produced.
	static void CheckConstexprCtorInitializer(Sema &SemaRef,
	const FunctionDecl *Dcl,
	FieldDecl *Field,
	llvm::SmallSet<Decl*, 16> &Inits,
	bool &Diagnosed) {
	if (Field->isInvalidDecl())
	return;

	if (Field->isUnnamedBitfield())
	return;

	// Anonymous unions with no variant members and empty anonymous structs do not
	// need to be explicitly initialized. FIXME: Anonymous structs that contain no
	// indirect fields don't need initializing.
	if (Field->isAnonymousStructOrUnion() &&
	(Field->getType()->isUnionType()
	? !Field->getType()->getAsCXXRecordDecl()->hasVariantMembers()
	: Field->getType()->getAsCXXRecordDecl()->isEmpty()))
	return;

	if (!Inits.count(Field)) {
	if (!Diagnosed) {
	SemaRef.Diag(Dcl->getLocation(), diag::err_constexpr_ctor_missing_init);
	Diagnosed = true;
	}
	SemaRef.Diag(Field->getLocation(), diag::note_constexpr_ctor_missing_init);
	} else if (Field->isAnonymousStructOrUnion()) {
	const RecordDecl *RD = Field->getType()->castAs<RecordType>()->getDecl();
	for (auto *I : RD->fields())
	// If an anonymous union contains an anonymous struct of which any member
	// is initialized, all members must be initialized.
	if (!RD->isUnion() \|\| Inits.count(I))
	CheckConstexprCtorInitializer(SemaRef, Dcl, I, Inits, Diagnosed);
	}
	}

	/// Check the provided statement is allowed in a constexpr function
	/// definition.
	static bool
	CheckConstexprFunctionStmt(Sema &SemaRef, const FunctionDecl Dcl, Stmt S,
	SmallVectorImpl<SourceLocation> &ReturnStmts,
	SourceLocation &Cxx1yLoc) {
	// - its function-body shall be [...] a compound-statement that contains only
	switch (S->getStmtClass()) {
	case Stmt::NullStmtClass:
	// - null statements,
	return true;

	case Stmt::DeclStmtClass:
	// - static_assert-declarations
	// - using-declarations,
	// - using-directives,
	// - typedef declarations and alias-declarations that do not define
	// classes or enumerations,
	if (!CheckConstexprDeclStmt(SemaRef, Dcl, cast<DeclStmt>(S), Cxx1yLoc))
	return false;
	return true;

	case Stmt::ReturnStmtClass:
	// - and exactly one return statement;
	if (isa<CXXConstructorDecl>(Dcl)) {
	// C++1y allows return statements in constexpr constructors.
	if (!Cxx1yLoc.isValid())
	Cxx1yLoc = S->getLocStart();
	return true;
	}

	ReturnStmts.push_back(S->getLocStart());
	return true;

	case Stmt::CompoundStmtClass: {
	// C++1y allows compound-statements.
	if (!Cxx1yLoc.isValid())
	Cxx1yLoc = S->getLocStart();

	CompoundStmt *CompStmt = cast<CompoundStmt>(S);
	for (auto *BodyIt : CompStmt->body()) {
	if (!CheckConstexprFunctionStmt(SemaRef, Dcl, BodyIt, ReturnStmts,
	Cxx1yLoc))
	return false;
	}
	return true;
	}

	case Stmt::AttributedStmtClass:
	if (!Cxx1yLoc.isValid())
	Cxx1yLoc = S->getLocStart();
	return true;

	case Stmt::IfStmtClass: {
	// C++1y allows if-statements.
	if (!Cxx1yLoc.isValid())
	Cxx1yLoc = S->getLocStart();

	IfStmt *If = cast<IfStmt>(S);
	if (!CheckConstexprFunctionStmt(SemaRef, Dcl, If->getThen(), ReturnStmts,
	Cxx1yLoc))
	return false;
	if (If->getElse() &&
	!CheckConstexprFunctionStmt(SemaRef, Dcl, If->getElse(), ReturnStmts,
	Cxx1yLoc))
	return false;
	return true;
	}

	case Stmt::WhileStmtClass:
	case Stmt::DoStmtClass:
	case Stmt::ForStmtClass:
	case Stmt::CXXForRangeStmtClass:
	case Stmt::ContinueStmtClass:
	// C++1y allows all of these. We don't allow them as extensions in C++11,
	// because they don't make sense without variable mutation.
	if (!SemaRef.getLangOpts().CPlusPlus14)
	break;
	if (!Cxx1yLoc.isValid())
	Cxx1yLoc = S->getLocStart();
	for (Stmt *SubStmt : S->children())
	if (SubStmt &&
	!CheckConstexprFunctionStmt(SemaRef, Dcl, SubStmt, ReturnStmts,
	Cxx1yLoc))
	return false;
	return true;

	case Stmt::SwitchStmtClass:
	case Stmt::CaseStmtClass:
	case Stmt::DefaultStmtClass:
	case Stmt::BreakStmtClass:
	// C++1y allows switch-statements, and since they don't need variable
	// mutation, we can reasonably allow them in C++11 as an extension.
	if (!Cxx1yLoc.isValid())
	Cxx1yLoc = S->getLocStart();
	for (Stmt *SubStmt : S->children())
	if (SubStmt &&
	!CheckConstexprFunctionStmt(SemaRef, Dcl, SubStmt, ReturnStmts,
	Cxx1yLoc))
	return false;
	return true;

	default:
	if (!isa<Expr>(S))
	break;

	// C++1y allows expression-statements.
	if (!Cxx1yLoc.isValid())
	Cxx1yLoc = S->getLocStart();
	return true;
	}

	SemaRef.Diag(S->getLocStart(), diag::err_constexpr_body_invalid_stmt)
	<< isa<CXXConstructorDecl>(Dcl);
	return false;
	}

	/// Check the body for the given constexpr function declaration only contains
	/// the permitted types of statement. C++11 [dcl.constexpr]p3,p4.
	///
	/// \return true if the body is OK, false if we have diagnosed a problem.
	bool Sema::CheckConstexprFunctionBody(const FunctionDecl Dcl, Stmt Body) {
	if (isa<CXXTryStmt>(Body)) {
	// C++11 [dcl.constexpr]p3:
	// The definition of a constexpr function shall satisfy the following
	// constraints: [...]
	// - its function-body shall be = delete, = default, or a
	// compound-statement
	//
	// C++11 [dcl.constexpr]p4:
	// In the definition of a constexpr constructor, [...]
	// - its function-body shall not be a function-try-block;
	Diag(Body->getLocStart(), diag::err_constexpr_function_try_block)
	<< isa<CXXConstructorDecl>(Dcl);
	return false;
	}

	SmallVector<SourceLocation, 4> ReturnStmts;

	// - its function-body shall be [...] a compound-statement that contains only
	// [... list of cases ...]
	CompoundStmt *CompBody = cast<CompoundStmt>(Body);
	SourceLocation Cxx1yLoc;
	for (auto *BodyIt : CompBody->body()) {
	if (!CheckConstexprFunctionStmt(*this, Dcl, BodyIt, ReturnStmts, Cxx1yLoc))
	return false;
	}

	if (Cxx1yLoc.isValid())
	Diag(Cxx1yLoc,
	getLangOpts().CPlusPlus14
	? diag::warn_cxx11_compat_constexpr_body_invalid_stmt
	: diag::ext_constexpr_body_invalid_stmt)
	<< isa<CXXConstructorDecl>(Dcl);

	if (const CXXConstructorDecl *Constructor
	= dyn_cast<CXXConstructorDecl>(Dcl)) {
	const CXXRecordDecl *RD = Constructor->getParent();
	// DR1359:
	// - every non-variant non-static data member and base class sub-object
	// shall be initialized;
	// DR1460:
	// - if the class is a union having variant members, exactly one of them
	// shall be initialized;
	if (RD->isUnion()) {
	if (Constructor->getNumCtorInitializers() == 0 &&
	RD->hasVariantMembers()) {
	Diag(Dcl->getLocation(), diag::err_constexpr_union_ctor_no_init);
	return false;
	}
	} else if (!Constructor->isDependentContext() &&
	!Constructor->isDelegatingConstructor()) {
	assert(RD->getNumVBases() == 0 && "constexpr ctor with virtual bases");

	// Skip detailed checking if we have enough initializers, and we would
	// allow at most one initializer per member.
	bool AnyAnonStructUnionMembers = false;
	unsigned Fields = 0;
	for (CXXRecordDecl::field_iterator I = RD->field_begin(),
	E = RD->field_end(); I != E; ++I, ++Fields) {
	if (I->isAnonymousStructOrUnion()) {
	AnyAnonStructUnionMembers = true;
	break;
	}
	}
	// DR1460:
	// - if the class is a union-like class, but is not a union, for each of
	// its anonymous union members having variant members, exactly one of
	// them shall be initialized;
	if (AnyAnonStructUnionMembers \|\|
	Constructor->getNumCtorInitializers() != RD->getNumBases() + Fields) {
	// Check initialization of non-static data members. Base classes are
	// always initialized so do not need to be checked. Dependent bases
	// might not have initializers in the member initializer list.
	llvm::SmallSet<Decl*, 16> Inits;
	for (const auto *I: Constructor->inits()) {
	if (FieldDecl *FD = I->getMember())
	Inits.insert(FD);
	else if (IndirectFieldDecl *ID = I->getIndirectMember())
	Inits.insert(ID->chain_begin(), ID->chain_end());
	}

	bool Diagnosed = false;
	for (auto *I : RD->fields())
	CheckConstexprCtorInitializer(*this, Dcl, I, Inits, Diagnosed);
	if (Diagnosed)
	return false;
	}
	}
	} else {
	if (ReturnStmts.empty()) {
	// C++1y doesn't require constexpr functions to contain a 'return'
	// statement. We still do, unless the return type might be void, because
	// otherwise if there's no return statement, the function cannot
	// be used in a core constant expression.
	bool OK = getLangOpts().CPlusPlus14 &&
	(Dcl->getReturnType()->isVoidType() \|\|
	Dcl->getReturnType()->isDependentType());
	Diag(Dcl->getLocation(),
	OK ? diag::warn_cxx11_compat_constexpr_body_no_return
	: diag::err_constexpr_body_no_return);
	if (!OK)
	return false;
	} else if (ReturnStmts.size() > 1) {
	Diag(ReturnStmts.back(),
	getLangOpts().CPlusPlus14
	? diag::warn_cxx11_compat_constexpr_body_multiple_return
	: diag::ext_constexpr_body_multiple_return);
	for (unsigned I = 0; I < ReturnStmts.size() - 1; ++I)
	Diag(ReturnStmts[I], diag::note_constexpr_body_previous_return);
	}
	}

	// C++11 [dcl.constexpr]p5:
	// if no function argument values exist such that the function invocation
	// substitution would produce a constant expression, the program is
	// ill-formed; no diagnostic required.
	// C++11 [dcl.constexpr]p3:
	// - every constructor call and implicit conversion used in initializing the
	// return value shall be one of those allowed in a constant expression.
	// C++11 [dcl.constexpr]p4:
	// - every constructor involved in initializing non-static data members and
	// base class sub-objects shall be a constexpr constructor.
	SmallVector<PartialDiagnosticAt, 8> Diags;
	if (!Expr::isPotentialConstantExpr(Dcl, Diags)) {
	Diag(Dcl->getLocation(), diag::ext_constexpr_function_never_constant_expr)
	<< isa<CXXConstructorDecl>(Dcl);
	for (size_t I = 0, N = Diags.size(); I != N; ++I)
	Diag(Diags[I].first, Diags[I].second);
	// Don't return false here: we allow this for compatibility in
	// system headers.
	}

	return true;
	}

	/// isCurrentClassName - Determine whether the identifier II is the
	/// name of the class type currently being defined. In the case of
	/// nested classes, this will only return true if II is the name of
	/// the innermost class.
	bool Sema::isCurrentClassName(const IdentifierInfo &II, Scope *,
	const CXXScopeSpec *SS) {
	assert(getLangOpts().CPlusPlus && "No class names in C!");

	CXXRecordDecl *CurDecl;
	if (SS && SS->isSet() && !SS->isInvalid()) {
	DeclContext DC = computeDeclContext(SS, true);
	CurDecl = dyn_cast_or_null<CXXRecordDecl>(DC);
	} else
	CurDecl = dyn_cast_or_null<CXXRecordDecl>(CurContext);

	if (CurDecl && CurDecl->getIdentifier())
	return &II == CurDecl->getIdentifier();
	return false;
	}

	/// \brief Determine whether the identifier II is a typo for the name of
	/// the class type currently being defined. If so, update it to the identifier
	/// that should have been used.
	bool Sema::isCurrentClassNameTypo(IdentifierInfo &II, const CXXScopeSpec SS) {
	assert(getLangOpts().CPlusPlus && "No class names in C!");

	if (!getLangOpts().SpellChecking)
	return false;

	CXXRecordDecl *CurDecl;
	if (SS && SS->isSet() && !SS->isInvalid()) {
	DeclContext DC = computeDeclContext(SS, true);
	CurDecl = dyn_cast_or_null<CXXRecordDecl>(DC);
	} else
	CurDecl = dyn_cast_or_null<CXXRecordDecl>(CurContext);

	if (CurDecl && CurDecl->getIdentifier() && II != CurDecl->getIdentifier() &&
	3 * II->getName().edit_distance(CurDecl->getIdentifier()->getName())
	< II->getLength()) {
	II = CurDecl->getIdentifier();
	return true;
	}

	return false;
	}

	/// \brief Determine whether the given class is a base class of the given
	/// class, including looking at dependent bases.
	static bool findCircularInheritance(const CXXRecordDecl *Class,
	const CXXRecordDecl *Current) {
	SmallVector<const CXXRecordDecl*, 8> Queue;

	Class = Class->getCanonicalDecl();
	while (true) {
	for (const auto &I : Current->bases()) {
	CXXRecordDecl *Base = I.getType()->getAsCXXRecordDecl();
	if (!Base)
	continue;

	Base = Base->getDefinition();
	if (!Base)
	continue;

	if (Base->getCanonicalDecl() == Class)
	return true;

	Queue.push_back(Base);
	}

	if (Queue.empty())
	return false;

	Current = Queue.pop_back_val();
	}

	return false;
	}

	/// \brief Check the validity of a C++ base class specifier.
	///
	/// \returns a new CXXBaseSpecifier if well-formed, emits diagnostics
	/// and returns NULL otherwise.
	CXXBaseSpecifier *
	Sema::CheckBaseSpecifier(CXXRecordDecl *Class,
	SourceRange SpecifierRange,
	bool Virtual, AccessSpecifier Access,
	TypeSourceInfo *TInfo,
	SourceLocation EllipsisLoc) {
	QualType BaseType = TInfo->getType();

	// C++ [class.union]p1:
	// A union shall not have base classes.
	if (Class->isUnion()) {
	Diag(Class->getLocation(), diag::err_base_clause_on_union)
	<< SpecifierRange;
	return nullptr;
	}

	if (EllipsisLoc.isValid() &&
	!TInfo->getType()->containsUnexpandedParameterPack()) {
	Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs)
	<< TInfo->getTypeLoc().getSourceRange();
	EllipsisLoc = SourceLocation();
	}

	SourceLocation BaseLoc = TInfo->getTypeLoc().getBeginLoc();

	if (BaseType->isDependentType()) {
	// Make sure that we don't have circular inheritance among our dependent
	// bases. For non-dependent bases, the check for completeness below handles
	// this.
	if (CXXRecordDecl *BaseDecl = BaseType->getAsCXXRecordDecl()) {
	if (BaseDecl->getCanonicalDecl() == Class->getCanonicalDecl() \|\|
	((BaseDecl = BaseDecl->getDefinition()) &&
	findCircularInheritance(Class, BaseDecl))) {
	Diag(BaseLoc, diag::err_circular_inheritance)
	<< BaseType << Context.getTypeDeclType(Class);

	if (BaseDecl->getCanonicalDecl() != Class->getCanonicalDecl())
	Diag(BaseDecl->getLocation(), diag::note_previous_decl)
	<< BaseType;

	return nullptr;
	}
	}

	return new (Context) CXXBaseSpecifier(SpecifierRange, Virtual,
	Class->getTagKind() == TTK_Class,
	Access, TInfo, EllipsisLoc);
	}

	// Base specifiers must be record types.
	if (!BaseType->isRecordType()) {
	Diag(BaseLoc, diag::err_base_must_be_class) << SpecifierRange;
	return nullptr;
	}

	// C++ [class.union]p1:
	// A union shall not be used as a base class.
	if (BaseType->isUnionType()) {
	Diag(BaseLoc, diag::err_union_as_base_class) << SpecifierRange;
	return nullptr;
	}

	// For the MS ABI, propagate DLL attributes to base class templates.
	if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
	if (Attr *ClassAttr = getDLLAttr(Class)) {
	if (auto *BaseTemplate = dyn_cast_or_null<ClassTemplateSpecializationDecl>(
	BaseType->getAsCXXRecordDecl())) {
	propagateDLLAttrToBaseClassTemplate(Class, ClassAttr, BaseTemplate,
	BaseLoc);
	}
	}
	}

	// C++ [class.derived]p2:
	// The class-name in a base-specifier shall not be an incompletely
	// defined class.
	if (RequireCompleteType(BaseLoc, BaseType,
	diag::err_incomplete_base_class, SpecifierRange)) {
	Class->setInvalidDecl();
	return nullptr;
	}

	// If the base class is polymorphic or isn't empty, the new one is/isn't, too.
	RecordDecl *BaseDecl = BaseType->getAs<RecordType>()->getDecl();
	assert(BaseDecl && "Record type has no declaration");
	BaseDecl = BaseDecl->getDefinition();
	assert(BaseDecl && "Base type is not incomplete, but has no definition");
	CXXRecordDecl *CXXBaseDecl = cast<CXXRecordDecl>(BaseDecl);
	assert(CXXBaseDecl && "Base type is not a C++ type");

	// A class which contains a flexible array member is not suitable for use as a
	// base class:
	// - If the layout determines that a base comes before another base,
	// the flexible array member would index into the subsequent base.
	// - If the layout determines that base comes before the derived class,
	// the flexible array member would index into the derived class.
	if (CXXBaseDecl->hasFlexibleArrayMember()) {
	Diag(BaseLoc, diag::err_base_class_has_flexible_array_member)
	<< CXXBaseDecl->getDeclName();
	return nullptr;
	}

	// C++ [class]p3:
	// If a class is marked final and it appears as a base-type-specifier in
	// base-clause, the program is ill-formed.
	if (FinalAttr *FA = CXXBaseDecl->getAttr<FinalAttr>()) {
	Diag(BaseLoc, diag::err_class_marked_final_used_as_base)
	<< CXXBaseDecl->getDeclName()
	<< FA->isSpelledAsSealed();
	Diag(CXXBaseDecl->getLocation(), diag::note_entity_declared_at)
	<< CXXBaseDecl->getDeclName() << FA->getRange();
	return nullptr;
	}

	if (BaseDecl->isInvalidDecl())
	Class->setInvalidDecl();

	// Create the base specifier.
	return new (Context) CXXBaseSpecifier(SpecifierRange, Virtual,
	Class->getTagKind() == TTK_Class,
	Access, TInfo, EllipsisLoc);
	}

	/// ActOnBaseSpecifier - Parsed a base specifier. A base specifier is
	/// one entry in the base class list of a class specifier, for
	/// example:
	/// class foo : public bar, virtual private baz {
	/// 'public bar' and 'virtual private baz' are each base-specifiers.
	BaseResult
	Sema::ActOnBaseSpecifier(Decl *classdecl, SourceRange SpecifierRange,
	ParsedAttributes &Attributes,
	bool Virtual, AccessSpecifier Access,
	ParsedType basetype, SourceLocation BaseLoc,
	SourceLocation EllipsisLoc) {
	if (!classdecl)
	return true;

	AdjustDeclIfTemplate(classdecl);
	CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(classdecl);
	if (!Class)
	return true;

	// We haven't yet attached the base specifiers.
	Class->setIsParsingBaseSpecifiers();

	// We do not support any C++11 attributes on base-specifiers yet.
	// Diagnose any attributes we see.
	if (!Attributes.empty()) {
	for (AttributeList *Attr = Attributes.getList(); Attr;
	Attr = Attr->getNext()) {
	if (Attr->isInvalid() \|\|
	Attr->getKind() == AttributeList::IgnoredAttribute)
	continue;
	Diag(Attr->getLoc(),
	Attr->getKind() == AttributeList::UnknownAttribute
	? diag::warn_unknown_attribute_ignored
	: diag::err_base_specifier_attribute)
	<< Attr->getName();
	}
	}

	TypeSourceInfo *TInfo = nullptr;
	GetTypeFromParser(basetype, &TInfo);

	if (EllipsisLoc.isInvalid() &&
	DiagnoseUnexpandedParameterPack(SpecifierRange.getBegin(), TInfo,
	UPPC_BaseType))
	return true;

	if (CXXBaseSpecifier *BaseSpec = CheckBaseSpecifier(Class, SpecifierRange,
	Virtual, Access, TInfo,
	EllipsisLoc))
	return BaseSpec;
	else
	Class->setInvalidDecl();

	return true;
	}

	/// Use small set to collect indirect bases. As this is only used
	/// locally, there's no need to abstract the small size parameter.
	typedef llvm::SmallPtrSet<QualType, 4> IndirectBaseSet;

	/// \brief Recursively add the bases of Type. Don't add Type itself.
	static void
	NoteIndirectBases(ASTContext &Context, IndirectBaseSet &Set,
	const QualType &Type)
	{
	// Even though the incoming type is a base, it might not be
	// a class -- it could be a template parm, for instance.
	if (auto Rec = Type->getAs<RecordType>()) {
	auto Decl = Rec->getAsCXXRecordDecl();

	// Iterate over its bases.
	for (const auto &BaseSpec : Decl->bases()) {
	QualType Base = Context.getCanonicalType(BaseSpec.getType())
	.getUnqualifiedType();
	if (Set.insert(Base).second)
	// If we've not already seen it, recurse.
	NoteIndirectBases(Context, Set, Base);
	}
	}
	}

	/// \brief Performs the actual work of attaching the given base class
	/// specifiers to a C++ class.
	bool Sema::AttachBaseSpecifiers(CXXRecordDecl *Class,
	MutableArrayRef<CXXBaseSpecifier *> Bases) {
	if (Bases.empty())
	return false;

	// Used to keep track of which base types we have already seen, so
	// that we can properly diagnose redundant direct base types. Note
	// that the key is always the unqualified canonical type of the base
	// class.
	std::map<QualType, CXXBaseSpecifier*, QualTypeOrdering> KnownBaseTypes;

	// Used to track indirect bases so we can see if a direct base is
	// ambiguous.
	IndirectBaseSet IndirectBaseTypes;

	// Copy non-redundant base specifiers into permanent storage.
	unsigned NumGoodBases = 0;
	bool Invalid = false;
	for (unsigned idx = 0; idx < Bases.size(); ++idx) {
	QualType NewBaseType
	= Context.getCanonicalType(Bases[idx]->getType());
	NewBaseType = NewBaseType.getLocalUnqualifiedType();

	CXXBaseSpecifier *&KnownBase = KnownBaseTypes[NewBaseType];
	if (KnownBase) {
	// C++ [class.mi]p3:
	// A class shall not be specified as a direct base class of a
	// derived class more than once.
	Diag(Bases[idx]->getLocStart(),
	diag::err_duplicate_base_class)
	<< KnownBase->getType()
	<< Bases[idx]->getSourceRange();

	// Delete the duplicate base class specifier; we're going to
	// overwrite its pointer later.
	Context.Deallocate(Bases[idx]);

	Invalid = true;
	} else {
	// Okay, add this new base class.
	KnownBase = Bases[idx];
	Bases[NumGoodBases++] = Bases[idx];

	// Note this base's direct & indirect bases, if there could be ambiguity.
	if (Bases.size() > 1)
	NoteIndirectBases(Context, IndirectBaseTypes, NewBaseType);

	if (const RecordType *Record = NewBaseType->getAs<RecordType>()) {
	const CXXRecordDecl *RD = cast<CXXRecordDecl>(Record->getDecl());
	if (Class->isInterface() &&
	(!RD->isInterface() \|\|
	KnownBase->getAccessSpecifier() != AS_public)) {
	// The Microsoft extension __interface does not permit bases that
	// are not themselves public interfaces.
	Diag(KnownBase->getLocStart(), diag::err_invalid_base_in_interface)
	<< getRecordDiagFromTagKind(RD->getTagKind()) << RD->getName()
	<< RD->getSourceRange();
	Invalid = true;
	}
	if (RD->hasAttr<WeakAttr>())
	Class->addAttr(WeakAttr::CreateImplicit(Context));
	}
	}
	}

	// Attach the remaining base class specifiers to the derived class.
	Class->setBases(Bases.data(), NumGoodBases);

	for (unsigned idx = 0; idx < NumGoodBases; ++idx) {
	// Check whether this direct base is inaccessible due to ambiguity.
	QualType BaseType = Bases[idx]->getType();
	CanQualType CanonicalBase = Context.getCanonicalType(BaseType)
	.getUnqualifiedType();

	if (IndirectBaseTypes.count(CanonicalBase)) {
	CXXBasePaths Paths(/FindAmbiguities=/true, /RecordPaths=/true,
	/DetectVirtual=/true);
	bool found
	= Class->isDerivedFrom(CanonicalBase->getAsCXXRecordDecl(), Paths);
	assert(found);
	(void)found;

	if (Paths.isAmbiguous(CanonicalBase))
	Diag(Bases[idx]->getLocStart (), diag::warn_inaccessible_base_class)
	<< BaseType << getAmbiguousPathsDisplayString(Paths)
	<< Bases[idx]->getSourceRange();
	else
	assert(Bases[idx]->isVirtual());
	}

	// Delete the base class specifier, since its data has been copied
	// into the CXXRecordDecl.
	Context.Deallocate(Bases[idx]);
	}

	return Invalid;
	}

	/// ActOnBaseSpecifiers - Attach the given base specifiers to the
	/// class, after checking whether there are any duplicate base
	/// classes.
	void Sema::ActOnBaseSpecifiers(Decl *ClassDecl,
	MutableArrayRef<CXXBaseSpecifier *> Bases) {
	if (!ClassDecl \|\| Bases.empty())
	return;

	AdjustDeclIfTemplate(ClassDecl);
	AttachBaseSpecifiers(cast<CXXRecordDecl>(ClassDecl), Bases);
	}

	/// \brief Determine whether the type \p Derived is a C++ class that is
	/// derived from the type \p Base.
	bool Sema::IsDerivedFrom(SourceLocation Loc, QualType Derived, QualType Base) {
	if (!getLangOpts().CPlusPlus)
	return false;

	CXXRecordDecl *DerivedRD = Derived->getAsCXXRecordDecl();
	if (!DerivedRD)
	return false;

	CXXRecordDecl *BaseRD = Base->getAsCXXRecordDecl();
	if (!BaseRD)
	return false;

	// If either the base or the derived type is invalid, don't try to
	// check whether one is derived from the other.
	if (BaseRD->isInvalidDecl() \|\| DerivedRD->isInvalidDecl())
	return false;

	// FIXME: In a modules build, do we need the entire path to be visible for us
	// to be able to use the inheritance relationship?
	if (!isCompleteType(Loc, Derived) && !DerivedRD->isBeingDefined())
	return false;

	return DerivedRD->isDerivedFrom(BaseRD);
	}

	/// \brief Determine whether the type \p Derived is a C++ class that is
	/// derived from the type \p Base.
	bool Sema::IsDerivedFrom(SourceLocation Loc, QualType Derived, QualType Base,
	CXXBasePaths &Paths) {
	if (!getLangOpts().CPlusPlus)
	return false;

	CXXRecordDecl *DerivedRD = Derived->getAsCXXRecordDecl();
	if (!DerivedRD)
	return false;

	CXXRecordDecl *BaseRD = Base->getAsCXXRecordDecl();
	if (!BaseRD)
	return false;

	if (!isCompleteType(Loc, Derived) && !DerivedRD->isBeingDefined())
	return false;

	return DerivedRD->isDerivedFrom(BaseRD, Paths);
	}

	void Sema::BuildBasePathArray(const CXXBasePaths &Paths,
	CXXCastPath &BasePathArray) {
	assert(BasePathArray.empty() && "Base path array must be empty!");
	assert(Paths.isRecordingPaths() && "Must record paths!");

	const CXXBasePath &Path = Paths.front();

	// We first go backward and check if we have a virtual base.
	// FIXME: It would be better if CXXBasePath had the base specifier for
	// the nearest virtual base.
	unsigned Start = 0;
	for (unsigned I = Path.size(); I != 0; --I) {
	if (Path[I - 1].Base->isVirtual()) {
	Start = I - 1;
	break;
	}
	}

	// Now add all bases.
	for (unsigned I = Start, E = Path.size(); I != E; ++I)
	BasePathArray.push_back(const_cast<CXXBaseSpecifier*>(Path[I].Base));
	}

	/// CheckDerivedToBaseConversion - Check whether the Derived-to-Base
	/// conversion (where Derived and Base are class types) is
	/// well-formed, meaning that the conversion is unambiguous (and
	/// that all of the base classes are accessible). Returns true
	/// and emits a diagnostic if the code is ill-formed, returns false
	/// otherwise. Loc is the location where this routine should point to
	/// if there is an error, and Range is the source range to highlight
	/// if there is an error.
	///
	/// If either InaccessibleBaseID or AmbigiousBaseConvID are 0, then the
	/// diagnostic for the respective type of error will be suppressed, but the
	/// check for ill-formed code will still be performed.
	bool
	Sema::CheckDerivedToBaseConversion(QualType Derived, QualType Base,
	unsigned InaccessibleBaseID,
	unsigned AmbigiousBaseConvID,
	SourceLocation Loc, SourceRange Range,
	DeclarationName Name,
	CXXCastPath *BasePath,
	bool IgnoreAccess) {
	// First, determine whether the path from Derived to Base is
	// ambiguous. This is slightly more expensive than checking whether
	// the Derived to Base conversion exists, because here we need to
	// explore multiple paths to determine if there is an ambiguity.
	CXXBasePaths Paths(/FindAmbiguities=/true, /RecordPaths=/true,
	/DetectVirtual=/false);
	bool DerivationOkay = IsDerivedFrom(Loc, Derived, Base, Paths);
	assert(DerivationOkay &&
	"Can only be used with a derived-to-base conversion");
	(void)DerivationOkay;

	if (!Paths.isAmbiguous(Context.getCanonicalType(Base).getUnqualifiedType())) {
	if (!IgnoreAccess) {
	// Check that the base class can be accessed.
	switch (CheckBaseClassAccess(Loc, Base, Derived, Paths.front(),
	InaccessibleBaseID)) {
	case AR_inaccessible:
	return true;
	case AR_accessible:
	case AR_dependent:
	case AR_delayed:
	break;
	}
	}

	// Build a base path if necessary.
	if (BasePath)
	BuildBasePathArray(Paths, *BasePath);
	return false;
	}

	if (AmbigiousBaseConvID) {
	// We know that the derived-to-base conversion is ambiguous, and
	// we're going to produce a diagnostic. Perform the derived-to-base
	// search just one more time to compute all of the possible paths so
	// that we can print them out. This is more expensive than any of
	// the previous derived-to-base checks we've done, but at this point
	// performance isn't as much of an issue.
	Paths.clear();
	Paths.setRecordingPaths(true);
	bool StillOkay = IsDerivedFrom(Loc, Derived, Base, Paths);
	assert(StillOkay && "Can only be used with a derived-to-base conversion");
	(void)StillOkay;

	// Build up a textual representation of the ambiguous paths, e.g.,
	// D -> B -> A, that will be used to illustrate the ambiguous
	// conversions in the diagnostic. We only print one of the paths
	// to each base class subobject.
	std::string PathDisplayStr = getAmbiguousPathsDisplayString(Paths);

	Diag(Loc, AmbigiousBaseConvID)
	<< Derived << Base << PathDisplayStr << Range << Name;
	}
	return true;
	}

	bool
	Sema::CheckDerivedToBaseConversion(QualType Derived, QualType Base,
	SourceLocation Loc, SourceRange Range,
	CXXCastPath *BasePath,
	bool IgnoreAccess) {
	return CheckDerivedToBaseConversion(
	Derived, Base, diag::err_upcast_to_inaccessible_base,
	diag::err_ambiguous_derived_to_base_conv, Loc, Range, DeclarationName(),
	BasePath, IgnoreAccess);
	}


	/// @brief Builds a string representing ambiguous paths from a
	/// specific derived class to different subobjects of the same base
	/// class.
	///
	/// This function builds a string that can be used in error messages
	/// to show the different paths that one can take through the
	/// inheritance hierarchy to go from the derived class to different
	/// subobjects of a base class. The result looks something like this:
	/// @code
	/// struct D -> struct B -> struct A
	/// struct D -> struct C -> struct A
	/// @endcode
	std::string Sema::getAmbiguousPathsDisplayString(CXXBasePaths &Paths) {
	std::string PathDisplayStr;
	std::set<unsigned> DisplayedPaths;
	for (CXXBasePaths::paths_iterator Path = Paths.begin();
	Path != Paths.end(); ++Path) {
	if (DisplayedPaths.insert(Path->back().SubobjectNumber).second) {
	// We haven't displayed a path to this particular base
	// class subobject yet.
	PathDisplayStr += "\n ";
	PathDisplayStr += Context.getTypeDeclType(Paths.getOrigin()).getAsString();
	for (CXXBasePath::const_iterator Element = Path->begin();
	Element != Path->end(); ++Element)
	PathDisplayStr += " -> " + Element->Base->getType().getAsString();
	}
	}

	return PathDisplayStr;
	}

	//===----------------------------------------------------------------------===//
	// C++ class member Handling
	//===----------------------------------------------------------------------===//

	/// ActOnAccessSpecifier - Parsed an access specifier followed by a colon.
	bool Sema::ActOnAccessSpecifier(AccessSpecifier Access,
	SourceLocation ASLoc,
	SourceLocation ColonLoc,
	AttributeList *Attrs) {
	assert(Access != AS_none && "Invalid kind for syntactic access specifier!");
	AccessSpecDecl *ASDecl = AccessSpecDecl::Create(Context, Access, CurContext,
	ASLoc, ColonLoc);
	CurContext->addHiddenDecl(ASDecl);
	return ProcessAccessDeclAttributeList(ASDecl, Attrs);
	}

	/// CheckOverrideControl - Check C++11 override control semantics.
	void Sema::CheckOverrideControl(NamedDecl *D) {
	if (D->isInvalidDecl())
	return;

	// We only care about "override" and "final" declarations.
	if (!D->hasAttr<OverrideAttr>() && !D->hasAttr<FinalAttr>())
	return;

	CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D);

	// We can't check dependent instance methods.
	if (MD && MD->isInstance() &&
	(MD->getParent()->hasAnyDependentBases() \|\|
	MD->getType()->isDependentType()))
	return;

	if (MD && !MD->isVirtual()) {
	// If we have a non-virtual method, check if if hides a virtual method.
	// (In that case, it's most likely the method has the wrong type.)
	SmallVector<CXXMethodDecl *, 8> OverloadedMethods;
	FindHiddenVirtualMethods(MD, OverloadedMethods);

	if (!OverloadedMethods.empty()) {
	if (OverrideAttr *OA = D->getAttr<OverrideAttr>()) {
	Diag(OA->getLocation(),
	diag::override_keyword_hides_virtual_member_function)
	<< "override" << (OverloadedMethods.size() > 1);
	} else if (FinalAttr *FA = D->getAttr<FinalAttr>()) {
	Diag(FA->getLocation(),
	diag::override_keyword_hides_virtual_member_function)
	<< (FA->isSpelledAsSealed() ? "sealed" : "final")
	<< (OverloadedMethods.size() > 1);
	}
	NoteHiddenVirtualMethods(MD, OverloadedMethods);
	MD->setInvalidDecl();
	return;
	}
	// Fall through into the general case diagnostic.
	// FIXME: We might want to attempt typo correction here.
	}

	if (!MD \|\| !MD->isVirtual()) {
	if (OverrideAttr *OA = D->getAttr<OverrideAttr>()) {
	Diag(OA->getLocation(),
	diag::override_keyword_only_allowed_on_virtual_member_functions)
	<< "override" << FixItHint::CreateRemoval(OA->getLocation());
	D->dropAttr<OverrideAttr>();
	}
	if (FinalAttr *FA = D->getAttr<FinalAttr>()) {
	Diag(FA->getLocation(),
	diag::override_keyword_only_allowed_on_virtual_member_functions)
	<< (FA->isSpelledAsSealed() ? "sealed" : "final")
	<< FixItHint::CreateRemoval(FA->getLocation());
	D->dropAttr<FinalAttr>();
	}
	return;
	}

	// C++11 [class.virtual]p5:
	// If a function is marked with the virt-specifier override and
	// does not override a member function of a base class, the program is
	// ill-formed.
	bool HasOverriddenMethods =
	MD->begin_overridden_methods() != MD->end_overridden_methods();
	if (MD->hasAttr<OverrideAttr>() && !HasOverriddenMethods)
	Diag(MD->getLocation(), diag::err_function_marked_override_not_overriding)
	<< MD->getDeclName();
	}

	void Sema::DiagnoseAbsenceOfOverrideControl(NamedDecl *D) {
	if (D->isInvalidDecl() \|\| D->hasAttr<OverrideAttr>())
	return;
	CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D);
	if (!MD \|\| MD->isImplicit() \|\| MD->hasAttr<FinalAttr>())
	return;

	SourceLocation Loc = MD->getLocation();
	SourceLocation SpellingLoc = Loc;
	if (getSourceManager().isMacroArgExpansion(Loc))
	SpellingLoc = getSourceManager().getImmediateExpansionRange(Loc).first;
	SpellingLoc = getSourceManager().getSpellingLoc(SpellingLoc);
	if (SpellingLoc.isValid() && getSourceManager().isInSystemHeader(SpellingLoc))
	return;

	if (MD->size_overridden_methods() > 0) {
	unsigned DiagID = isa<CXXDestructorDecl>(MD)
	? diag::warn_destructor_marked_not_override_overriding
	: diag::warn_function_marked_not_override_overriding;
	Diag(MD->getLocation(), DiagID) << MD->getDeclName();
	const CXXMethodDecl OMD = MD->begin_overridden_methods();
	Diag(OMD->getLocation(), diag::note_overridden_virtual_function);
	}
	}

	/// CheckIfOverriddenFunctionIsMarkedFinal - Checks whether a virtual member
	/// function overrides a virtual member function marked 'final', according to
	/// C++11 [class.virtual]p4.
	bool Sema::CheckIfOverriddenFunctionIsMarkedFinal(const CXXMethodDecl *New,
	const CXXMethodDecl *Old) {
	FinalAttr *FA = Old->getAttr<FinalAttr>();
	if (!FA)
	return false;

	Diag(New->getLocation(), diag::err_final_function_overridden)
	<< New->getDeclName()
	<< FA->isSpelledAsSealed();
	Diag(Old->getLocation(), diag::note_overridden_virtual_function);
	return true;
	}

	static bool InitializationHasSideEffects(const FieldDecl &FD) {
	const Type *T = FD.getType()->getBaseElementTypeUnsafe();
	// FIXME: Destruction of ObjC lifetime types has side-effects.
	if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
	return !RD->isCompleteDefinition() \|\|
	!RD->hasTrivialDefaultConstructor() \|\|
	!RD->hasTrivialDestructor();
	return false;
	}

	static AttributeList getMSPropertyAttr(AttributeList list) {
	for (AttributeList *it = list; it != nullptr; it = it->getNext())
	if (it->isDeclspecPropertyAttribute())
	return it;
	return nullptr;
	}

	// Check if there is a field shadowing.
	void Sema::CheckShadowInheritedFields(const SourceLocation &Loc,
	DeclarationName FieldName,
	const CXXRecordDecl *RD) {
	if (Diags.isIgnored(diag::warn_shadow_field, Loc))
	return;

	// To record a shadowed field in a base
	std::map<CXXRecordDecl, NamedDecl> Bases;
	auto FieldShadowed = [&](const CXXBaseSpecifier *Specifier,
	CXXBasePath &Path) {
	const auto Base = Specifier->getType()->getAsCXXRecordDecl();
	// Record an ambiguous path directly
	if (Bases.find(Base) != Bases.end())
	return true;
	for (const auto Field : Base->lookup(FieldName)) {
	if ((isa<FieldDecl>(Field) \|\| isa<IndirectFieldDecl>(Field)) &&
	Field->getAccess() != AS_private) {
	assert(Field->getAccess() != AS_none);
	assert(Bases.find(Base) == Bases.end());
	Bases[Base] = Field;
	return true;
	}
	}
	return false;
	};

	CXXBasePaths Paths(/FindAmbiguities=/true, /RecordPaths=/true,
	/DetectVirtual=/true);
	if (!RD->lookupInBases(FieldShadowed, Paths))
	return;

	for (const auto &P : Paths) {
	auto Base = P.back().Base->getType()->getAsCXXRecordDecl();
	auto It = Bases.find(Base);
	// Skip duplicated bases
	if (It == Bases.end())
	continue;
	auto BaseField = It->second;
	assert(BaseField->getAccess() != AS_private);
	if (AS_none !=
	CXXRecordDecl::MergeAccess(P.Access, BaseField->getAccess())) {
	Diag(Loc, diag::warn_shadow_field)
	<< FieldName.getAsString() << RD->getName() << Base->getName();
	Diag(BaseField->getLocation(), diag::note_shadow_field);
	Bases.erase(It);
	}
	}
	}

	/// ActOnCXXMemberDeclarator - This is invoked when a C++ class member
	/// declarator is parsed. 'AS' is the access specifier, 'BW' specifies the
	/// bitfield width if there is one, 'InitExpr' specifies the initializer if
	/// one has been parsed, and 'InitStyle' is set if an in-class initializer is
	/// present (but parsing it has been deferred).
	NamedDecl *
	Sema::ActOnCXXMemberDeclarator(Scope *S, AccessSpecifier AS, Declarator &D,
	MultiTemplateParamsArg TemplateParameterLists,
	Expr *BW, const VirtSpecifiers &VS,
	InClassInitStyle InitStyle) {
	const DeclSpec &DS = D.getDeclSpec();
	DeclarationNameInfo NameInfo = GetNameForDeclarator(D);
	DeclarationName Name = NameInfo.getName();
	SourceLocation Loc = NameInfo.getLoc();

	// For anonymous bitfields, the location should point to the type.
	if (Loc.isInvalid())
	Loc = D.getLocStart();

	Expr BitWidth = static_cast<Expr>(BW);

	assert(isa<CXXRecordDecl>(CurContext));
	assert(!DS.isFriendSpecified());

	bool isFunc = D.isDeclarationOfFunction();

	if (cast<CXXRecordDecl>(CurContext)->isInterface()) {
	// The Microsoft extension __interface only permits public member functions
	// and prohibits constructors, destructors, operators, non-public member
	// functions, static methods and data members.
	unsigned InvalidDecl;
	bool ShowDeclName = true;
	if (!isFunc)
	InvalidDecl = (DS.getStorageClassSpec() == DeclSpec::SCS_typedef) ? 0 : 1;
	else if (AS != AS_public)
	InvalidDecl = 2;
	else if (DS.getStorageClassSpec() == DeclSpec::SCS_static)
	InvalidDecl = 3;
	else switch (Name.getNameKind()) {
	case DeclarationName::CXXConstructorName:
	InvalidDecl = 4;
	ShowDeclName = false;
	break;

	case DeclarationName::CXXDestructorName:
	InvalidDecl = 5;
	ShowDeclName = false;
	break;

	case DeclarationName::CXXOperatorName:
	case DeclarationName::CXXConversionFunctionName:
	InvalidDecl = 6;
	break;

	default:
	InvalidDecl = 0;
	break;
	}

	if (InvalidDecl) {
	if (ShowDeclName)
	Diag(Loc, diag::err_invalid_member_in_interface)
	<< (InvalidDecl-1) << Name;
	else
	Diag(Loc, diag::err_invalid_member_in_interface)
	<< (InvalidDecl-1) << "";
	return nullptr;
	}
	}

	// C++ 9.2p6: A member shall not be declared to have automatic storage
	// duration (auto, register) or with the extern storage-class-specifier.
	// C++ 7.1.1p8: The mutable specifier can be applied only to names of class
	// data members and cannot be applied to names declared const or static,
	// and cannot be applied to reference members.
	switch (DS.getStorageClassSpec()) {
	case DeclSpec::SCS_unspecified:
	case DeclSpec::SCS_typedef:
	case DeclSpec::SCS_static:
	break;
	case DeclSpec::SCS_mutable:
	if (isFunc) {
	Diag(DS.getStorageClassSpecLoc(), diag::err_mutable_function);

	// FIXME: It would be nicer if the keyword was ignored only for this
	// declarator. Otherwise we could get follow-up errors.
	D.getMutableDeclSpec().ClearStorageClassSpecs();
	}
	break;
	default:
	Diag(DS.getStorageClassSpecLoc(),
	diag::err_storageclass_invalid_for_member);
	D.getMutableDeclSpec().ClearStorageClassSpecs();
	break;
	}

	bool isInstField = ((DS.getStorageClassSpec() == DeclSpec::SCS_unspecified \|\|
	DS.getStorageClassSpec() == DeclSpec::SCS_mutable) &&
	!isFunc);

	if (DS.isConstexprSpecified() && isInstField) {
	SemaDiagnosticBuilder B =
	Diag(DS.getConstexprSpecLoc(), diag::err_invalid_constexpr_member);
	SourceLocation ConstexprLoc = DS.getConstexprSpecLoc();
	if (InitStyle == ICIS_NoInit) {
	B << 0 << 0;
	if (D.getDeclSpec().getTypeQualifiers() & DeclSpec::TQ_const)
	B << FixItHint::CreateRemoval(ConstexprLoc);
	else {
	B << FixItHint::CreateReplacement(ConstexprLoc, "const");
	D.getMutableDeclSpec().ClearConstexprSpec();
	const char *PrevSpec;
	unsigned DiagID;
	bool Failed = D.getMutableDeclSpec().SetTypeQual(
	DeclSpec::TQ_const, ConstexprLoc, PrevSpec, DiagID, getLangOpts());
	(void)Failed;
	assert(!Failed && "Making a constexpr member const shouldn't fail");
	}
	} else {
	B << 1;
	const char *PrevSpec;
	unsigned DiagID;
	if (D.getMutableDeclSpec().SetStorageClassSpec(
	*this, DeclSpec::SCS_static, ConstexprLoc, PrevSpec, DiagID,
	Context.getPrintingPolicy())) {
	assert(DS.getStorageClassSpec() == DeclSpec::SCS_mutable &&
	"This is the only DeclSpec that should fail to be applied");
	B << 1;
	} else {
	B << 0 << FixItHint::CreateInsertion(ConstexprLoc, "static ");
	isInstField = false;
	}
	}
	}

	NamedDecl *Member;
	if (isInstField) {
	CXXScopeSpec &SS = D.getCXXScopeSpec();

	// Data members must have identifiers for names.
	if (!Name.isIdentifier()) {
	Diag(Loc, diag::err_bad_variable_name)
	<< Name;
	return nullptr;
	}

	IdentifierInfo *II = Name.getAsIdentifierInfo();

	// Member field could not be with "template" keyword.
	// So TemplateParameterLists should be empty in this case.
	if (TemplateParameterLists.size()) {
	TemplateParameterList* TemplateParams = TemplateParameterLists[0];
	if (TemplateParams->size()) {
	// There is no such thing as a member field template.
	Diag(D.getIdentifierLoc(), diag::err_template_member)
	<< II
	<< SourceRange(TemplateParams->getTemplateLoc(),
	TemplateParams->getRAngleLoc());
	} else {
	// There is an extraneous 'template<>' for this member.
	Diag(TemplateParams->getTemplateLoc(),
	diag::err_template_member_noparams)
	<< II
	<< SourceRange(TemplateParams->getTemplateLoc(),
	TemplateParams->getRAngleLoc());
	}
	return nullptr;
	}

	if (SS.isSet() && !SS.isInvalid()) {
	// The user provided a superfluous scope specifier inside a class
	// definition:
	//
	// class X {
	// int X::member;
	// };
	if (DeclContext *DC = computeDeclContext(SS, false))
	diagnoseQualifiedDeclaration(SS, DC, Name, D.getIdentifierLoc());
	else
	Diag(D.getIdentifierLoc(), diag::err_member_qualification)
	<< Name << SS.getRange();

	SS.clear();
	}

	AttributeList *MSPropertyAttr =
	getMSPropertyAttr(D.getDeclSpec().getAttributes().getList());
	if (MSPropertyAttr) {
	Member = HandleMSProperty(S, cast<CXXRecordDecl>(CurContext), Loc, D,
	BitWidth, InitStyle, AS, MSPropertyAttr);
	if (!Member)
	return nullptr;
	isInstField = false;
	} else {
	Member = HandleField(S, cast<CXXRecordDecl>(CurContext), Loc, D,
	BitWidth, InitStyle, AS);
	if (!Member)
	return nullptr;
	}

	CheckShadowInheritedFields(Loc, Name, cast<CXXRecordDecl>(CurContext));
	} else {
	Member = HandleDeclarator(S, D, TemplateParameterLists);
	if (!Member)
	return nullptr;

	// Non-instance-fields can't have a bitfield.
	if (BitWidth) {
	if (Member->isInvalidDecl()) {
	// don't emit another diagnostic.
	} else if (isa<VarDecl>(Member) \|\| isa<VarTemplateDecl>(Member)) {
	// C++ 9.6p3: A bit-field shall not be a static member.
	// "static member 'A' cannot be a bit-field"
	Diag(Loc, diag::err_static_not_bitfield)
	<< Name << BitWidth->getSourceRange();
	} else if (isa<TypedefDecl>(Member)) {
	// "typedef member 'x' cannot be a bit-field"
	Diag(Loc, diag::err_typedef_not_bitfield)
	<< Name << BitWidth->getSourceRange();
	} else {
	// A function typedef ("typedef int f(); f a;").
	// C++ 9.6p3: A bit-field shall have integral or enumeration type.
	Diag(Loc, diag::err_not_integral_type_bitfield)
	<< Name << cast<ValueDecl>(Member)->getType()
	<< BitWidth->getSourceRange();
	}

	BitWidth = nullptr;
	Member->setInvalidDecl();
	}

	Member->setAccess(AS);

	// If we have declared a member function template or static data member
	// template, set the access of the templated declaration as well.
	if (FunctionTemplateDecl *FunTmpl = dyn_cast<FunctionTemplateDecl>(Member))
	FunTmpl->getTemplatedDecl()->setAccess(AS);
	else if (VarTemplateDecl *VarTmpl = dyn_cast<VarTemplateDecl>(Member))
	VarTmpl->getTemplatedDecl()->setAccess(AS);
	}

	if (VS.isOverrideSpecified())
	Member->addAttr(new (Context) OverrideAttr(VS.getOverrideLoc(), Context, 0));
	if (VS.isFinalSpecified())
	Member->addAttr(new (Context) FinalAttr(VS.getFinalLoc(), Context,
	VS.isFinalSpelledSealed()));

	if (VS.getLastLocation().isValid()) {
	// Update the end location of a method that has a virt-specifiers.
	if (CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(Member))
	MD->setRangeEnd(VS.getLastLocation());
	}

	CheckOverrideControl(Member);

	assert((Name \|\| isInstField) && "No identifier for non-field ?");

	if (isInstField) {
	FieldDecl *FD = cast<FieldDecl>(Member);
	FieldCollector->Add(FD);

	if (!Diags.isIgnored(diag::warn_unused_private_field, FD->getLocation())) {
	// Remember all explicit private FieldDecls that have a name, no side
	// effects and are not part of a dependent type declaration.
	if (!FD->isImplicit() && FD->getDeclName() &&
	FD->getAccess() == AS_private &&
	!FD->hasAttr<UnusedAttr>() &&
	!FD->getParent()->isDependentContext() &&
	!InitializationHasSideEffects(*FD))
	UnusedPrivateFields.insert(FD);
	}
	}

	return Member;
	}

	namespace {
	class UninitializedFieldVisitor
	: public EvaluatedExprVisitor<UninitializedFieldVisitor> {
	Sema &S;
	// List of Decls to generate a warning on. Also remove Decls that become
	// initialized.
	llvm::SmallPtrSetImpl<ValueDecl*> &Decls;
	// List of base classes of the record. Classes are removed after their
	// initializers.
	llvm::SmallPtrSetImpl<QualType> &BaseClasses;
	// Vector of decls to be removed from the Decl set prior to visiting the
	// nodes. These Decls may have been initialized in the prior initializer.
	llvm::SmallVector<ValueDecl*, 4> DeclsToRemove;
	// If non-null, add a note to the warning pointing back to the constructor.
	const CXXConstructorDecl *Constructor;
	// Variables to hold state when processing an initializer list. When
	// InitList is true, special case initialization of FieldDecls matching
	// InitListFieldDecl.
	bool InitList;
	FieldDecl *InitListFieldDecl;
	llvm::SmallVector<unsigned, 4> InitFieldIndex;

	public:
	typedef EvaluatedExprVisitor<UninitializedFieldVisitor> Inherited;
	UninitializedFieldVisitor(Sema &S,
	llvm::SmallPtrSetImpl<ValueDecl*> &Decls,
	llvm::SmallPtrSetImpl<QualType> &BaseClasses)
	: Inherited(S.Context), S(S), Decls(Decls), BaseClasses(BaseClasses),
	Constructor(nullptr), InitList(false), InitListFieldDecl(nullptr) {}

	// Returns true if the use of ME is not an uninitialized use.
	bool IsInitListMemberExprInitialized(MemberExpr *ME,
	bool CheckReferenceOnly) {
	llvm::SmallVector<FieldDecl*, 4> Fields;
	bool ReferenceField = false;
	while (ME) {
	FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl());
	if (!FD)
	return false;
	Fields.push_back(FD);
	if (FD->getType()->isReferenceType())
	ReferenceField = true;
	ME = dyn_cast<MemberExpr>(ME->getBase()->IgnoreParenImpCasts());
	}

	// Binding a reference to an unintialized field is not an
	// uninitialized use.
	if (CheckReferenceOnly && !ReferenceField)
	return true;

	llvm::SmallVector<unsigned, 4> UsedFieldIndex;
	// Discard the first field since it is the field decl that is being
	// initialized.
	for (auto I = Fields.rbegin() + 1, E = Fields.rend(); I != E; ++I) {
	UsedFieldIndex.push_back((*I)->getFieldIndex());
	}

	for (auto UsedIter = UsedFieldIndex.begin(),
	UsedEnd = UsedFieldIndex.end(),
	OrigIter = InitFieldIndex.begin(),
	OrigEnd = InitFieldIndex.end();
	UsedIter != UsedEnd && OrigIter != OrigEnd; ++UsedIter, ++OrigIter) {
	if (UsedIter < OrigIter)
	return true;
	if (UsedIter > OrigIter)
	break;
	}

	return false;
	}

	void HandleMemberExpr(MemberExpr *ME, bool CheckReferenceOnly,
	bool AddressOf) {
	if (isa<EnumConstantDecl>(ME->getMemberDecl()))
	return;

	// FieldME is the inner-most MemberExpr that is not an anonymous struct
	// or union.
	MemberExpr *FieldME = ME;

	bool AllPODFields = FieldME->getType().isPODType(S.Context);

	Expr *Base = ME;
	while (MemberExpr *SubME =
	dyn_cast<MemberExpr>(Base->IgnoreParenImpCasts())) {

	if (isa<VarDecl>(SubME->getMemberDecl()))
	return;

	if (FieldDecl *FD = dyn_cast<FieldDecl>(SubME->getMemberDecl()))
	if (!FD->isAnonymousStructOrUnion())
	FieldME = SubME;

	if (!FieldME->getType().isPODType(S.Context))
	AllPODFields = false;

	Base = SubME->getBase();
	}

	if (!isa<CXXThisExpr>(Base->IgnoreParenImpCasts()))
	return;

	if (AddressOf && AllPODFields)
	return;

	ValueDecl* FoundVD = FieldME->getMemberDecl();

	if (ImplicitCastExpr *BaseCast = dyn_cast<ImplicitCastExpr>(Base)) {
	while (isa<ImplicitCastExpr>(BaseCast->getSubExpr())) {
	BaseCast = cast<ImplicitCastExpr>(BaseCast->getSubExpr());
	}

	if (BaseCast->getCastKind() == CK_UncheckedDerivedToBase) {
	QualType T = BaseCast->getType();
	if (T->isPointerType() &&
	BaseClasses.count(T->getPointeeType())) {
	S.Diag(FieldME->getExprLoc(), diag::warn_base_class_is_uninit)
	<< T->getPointeeType() << FoundVD;
	}
	}
	}

	if (!Decls.count(FoundVD))
	return;

	const bool IsReference = FoundVD->getType()->isReferenceType();

	if (InitList && !AddressOf && FoundVD == InitListFieldDecl) {
	// Special checking for initializer lists.
	if (IsInitListMemberExprInitialized(ME, CheckReferenceOnly)) {
	return;
	}
	} else {
	// Prevent double warnings on use of unbounded references.
	if (CheckReferenceOnly && !IsReference)
	return;
	}

	unsigned diag = IsReference
	? diag::warn_reference_field_is_uninit
	: diag::warn_field_is_uninit;
	S.Diag(FieldME->getExprLoc(), diag) << FoundVD;
	if (Constructor)
	S.Diag(Constructor->getLocation(),
	diag::note_uninit_in_this_constructor)
	<< (Constructor->isDefaultConstructor() && Constructor->isImplicit());

	}

	void HandleValue(Expr *E, bool AddressOf) {
	E = E->IgnoreParens();

	if (MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
	HandleMemberExpr(ME, false /CheckReferenceOnly/,
	AddressOf /AddressOf/);
	return;
	}

	if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E)) {
	Visit(CO->getCond());
	HandleValue(CO->getTrueExpr(), AddressOf);
	HandleValue(CO->getFalseExpr(), AddressOf);
	return;
	}

	if (BinaryConditionalOperator *BCO =
	dyn_cast<BinaryConditionalOperator>(E)) {
	Visit(BCO->getCond());
	HandleValue(BCO->getFalseExpr(), AddressOf);
	return;
	}

	if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E)) {
	HandleValue(OVE->getSourceExpr(), AddressOf);
	return;
	}

	if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) {
	switch (BO->getOpcode()) {
	default:
	break;
	case(BO_PtrMemD):
	case(BO_PtrMemI):
	HandleValue(BO->getLHS(), AddressOf);
	Visit(BO->getRHS());
	return;
	case(BO_Comma):
	Visit(BO->getLHS());
	HandleValue(BO->getRHS(), AddressOf);
	return;
	}
	}

	Visit(E);
	}

	void CheckInitListExpr(InitListExpr *ILE) {
	InitFieldIndex.push_back(0);
	for (auto Child : ILE->children()) {
	if (InitListExpr *SubList = dyn_cast<InitListExpr>(Child)) {
	CheckInitListExpr(SubList);
	} else {
	Visit(Child);
	}
	++InitFieldIndex.back();
	}
	InitFieldIndex.pop_back();
	}

	void CheckInitializer(Expr E, const CXXConstructorDecl FieldConstructor,
	FieldDecl Field, const Type BaseClass) {
	// Remove Decls that may have been initialized in the previous
	// initializer.
	for (ValueDecl* VD : DeclsToRemove)
	Decls.erase(VD);
	DeclsToRemove.clear();

	Constructor = FieldConstructor;
	InitListExpr *ILE = dyn_cast<InitListExpr>(E);

	if (ILE && Field) {
	InitList = true;
	InitListFieldDecl = Field;
	InitFieldIndex.clear();
	CheckInitListExpr(ILE);
	} else {
	InitList = false;
	Visit(E);
	}

	if (Field)
	Decls.erase(Field);
	if (BaseClass)
	BaseClasses.erase(BaseClass->getCanonicalTypeInternal());
	}

	void VisitMemberExpr(MemberExpr *ME) {
	// All uses of unbounded reference fields will warn.
	HandleMemberExpr(ME, true /CheckReferenceOnly/, false /AddressOf/);
	}

	void VisitImplicitCastExpr(ImplicitCastExpr *E) {
	if (E->getCastKind() == CK_LValueToRValue) {
	HandleValue(E->getSubExpr(), false /AddressOf/);
	return;
	}

	Inherited::VisitImplicitCastExpr(E);
	}

	void VisitCXXConstructExpr(CXXConstructExpr *E) {
	if (E->getConstructor()->isCopyConstructor()) {
	Expr *ArgExpr = E->getArg(0);
	if (InitListExpr *ILE = dyn_cast<InitListExpr>(ArgExpr))
	if (ILE->getNumInits() == 1)
	ArgExpr = ILE->getInit(0);
	if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(ArgExpr))
	if (ICE->getCastKind() == CK_NoOp)
	ArgExpr = ICE->getSubExpr();
	HandleValue(ArgExpr, false /AddressOf/);
	return;
	}
	Inherited::VisitCXXConstructExpr(E);
	}

	void VisitCXXMemberCallExpr(CXXMemberCallExpr *E) {
	Expr *Callee = E->getCallee();
	if (isa<MemberExpr>(Callee)) {
	HandleValue(Callee, false /AddressOf/);
	for (auto Arg : E->arguments())
	Visit(Arg);
	return;
	}

	Inherited::VisitCXXMemberCallExpr(E);
	}

	void VisitCallExpr(CallExpr *E) {
	// Treat std::move as a use.
	if (E->getNumArgs() == 1) {
	if (FunctionDecl *FD = E->getDirectCallee()) {
	if (FD->isInStdNamespace() && FD->getIdentifier() &&
	FD->getIdentifier()->isStr("move")) {
	HandleValue(E->getArg(0), false /AddressOf/);
	return;
	}
	}
	}

	Inherited::VisitCallExpr(E);
	}

	void VisitCXXOperatorCallExpr(CXXOperatorCallExpr *E) {
	Expr *Callee = E->getCallee();

	if (isa<UnresolvedLookupExpr>(Callee))
	return Inherited::VisitCXXOperatorCallExpr(E);

	Visit(Callee);
	for (auto Arg : E->arguments())
	HandleValue(Arg->IgnoreParenImpCasts(), false /AddressOf/);
	}

	void VisitBinaryOperator(BinaryOperator *E) {
	// If a field assignment is detected, remove the field from the
	// uninitiailized field set.
	if (E->getOpcode() == BO_Assign)
	if (MemberExpr *ME = dyn_cast<MemberExpr>(E->getLHS()))
	if (FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
	if (!FD->getType()->isReferenceType())
	DeclsToRemove.push_back(FD);

	if (E->isCompoundAssignmentOp()) {
	HandleValue(E->getLHS(), false /AddressOf/);
	Visit(E->getRHS());
	return;
	}

	Inherited::VisitBinaryOperator(E);
	}

	void VisitUnaryOperator(UnaryOperator *E) {
	if (E->isIncrementDecrementOp()) {
	HandleValue(E->getSubExpr(), false /AddressOf/);
	return;
	}
	if (E->getOpcode() == UO_AddrOf) {
	if (MemberExpr *ME = dyn_cast<MemberExpr>(E->getSubExpr())) {
	HandleValue(ME->getBase(), true /AddressOf/);
	return;
	}
	}

	Inherited::VisitUnaryOperator(E);
	}
	};

	// Diagnose value-uses of fields to initialize themselves, e.g.
	// foo(foo)
	// where foo is not also a parameter to the constructor.
	// Also diagnose across field uninitialized use such as
	// x(y), y(x)
	// TODO: implement -Wuninitialized and fold this into that framework.
	static void DiagnoseUninitializedFields(
	Sema &SemaRef, const CXXConstructorDecl *Constructor) {

	if (SemaRef.getDiagnostics().isIgnored(diag::warn_field_is_uninit,
	Constructor->getLocation())) {
	return;
	}

	if (Constructor->isInvalidDecl())
	return;

	const CXXRecordDecl *RD = Constructor->getParent();

	if (RD->getDescribedClassTemplate())
	return;

	// Holds fields that are uninitialized.
	llvm::SmallPtrSet<ValueDecl*, 4> UninitializedFields;

	// At the beginning, all fields are uninitialized.
	for (auto *I : RD->decls()) {
	if (auto *FD = dyn_cast<FieldDecl>(I)) {
	UninitializedFields.insert(FD);
	} else if (auto *IFD = dyn_cast<IndirectFieldDecl>(I)) {
	UninitializedFields.insert(IFD->getAnonField());
	}
	}

	llvm::SmallPtrSet<QualType, 4> UninitializedBaseClasses;
	for (auto I : RD->bases())
	UninitializedBaseClasses.insert(I.getType().getCanonicalType());

	if (UninitializedFields.empty() && UninitializedBaseClasses.empty())
	return;

	UninitializedFieldVisitor UninitializedChecker(SemaRef,
	UninitializedFields,
	UninitializedBaseClasses);

	for (const auto *FieldInit : Constructor->inits()) {
	if (UninitializedFields.empty() && UninitializedBaseClasses.empty())
	break;

	Expr *InitExpr = FieldInit->getInit();
	if (!InitExpr)
	continue;

	if (CXXDefaultInitExpr *Default =
	dyn_cast<CXXDefaultInitExpr>(InitExpr)) {
	InitExpr = Default->getExpr();
	if (!InitExpr)
	continue;
	// In class initializers will point to the constructor.
	UninitializedChecker.CheckInitializer(InitExpr, Constructor,
	FieldInit->getAnyMember(),
	FieldInit->getBaseClass());
	} else {
	UninitializedChecker.CheckInitializer(InitExpr, nullptr,
	FieldInit->getAnyMember(),
	FieldInit->getBaseClass());
	}
	}
	}
	} // namespace

	/// \brief Enter a new C++ default initializer scope. After calling this, the
	/// caller must call \ref ActOnFinishCXXInClassMemberInitializer, even if
	/// parsing or instantiating the initializer failed.
	void Sema::ActOnStartCXXInClassMemberInitializer() {
	// Create a synthetic function scope to represent the call to the constructor
	// that notionally surrounds a use of this initializer.
	PushFunctionScope();
	}

	/// \brief This is invoked after parsing an in-class initializer for a
	/// non-static C++ class member, and after instantiating an in-class initializer
	/// in a class template. Such actions are deferred until the class is complete.
	void Sema::ActOnFinishCXXInClassMemberInitializer(Decl *D,
	SourceLocation InitLoc,
	Expr *InitExpr) {
	// Pop the notional constructor scope we created earlier.
	PopFunctionScopeInfo(nullptr, D);

	FieldDecl *FD = dyn_cast<FieldDecl>(D);
	assert((isa<MSPropertyDecl>(D) \|\| FD->getInClassInitStyle() != ICIS_NoInit) &&
	"must set init style when field is created");

	if (!InitExpr) {
	D->setInvalidDecl();
	if (FD)
	FD->removeInClassInitializer();
	return;
	}

	if (DiagnoseUnexpandedParameterPack(InitExpr, UPPC_Initializer)) {
	FD->setInvalidDecl();
	FD->removeInClassInitializer();
	return;
	}

	ExprResult Init = InitExpr;
	if (!FD->getType()->isDependentType() && !InitExpr->isTypeDependent()) {
	InitializedEntity Entity = InitializedEntity::InitializeMember(FD);
	InitializationKind Kind = FD->getInClassInitStyle() == ICIS_ListInit
	? InitializationKind::CreateDirectList(InitExpr->getLocStart())
	: InitializationKind::CreateCopy(InitExpr->getLocStart(), InitLoc);
	InitializationSequence Seq(*this, Entity, Kind, InitExpr);
	Init = Seq.Perform(*this, Entity, Kind, InitExpr);
	if (Init.isInvalid()) {
	FD->setInvalidDecl();
	return;
	}
	}

	// C++11 [class.base.init]p7:
	// The initialization of each base and member constitutes a
	// full-expression.
	Init = ActOnFinishFullExpr(Init.get(), InitLoc);
	if (Init.isInvalid()) {
	FD->setInvalidDecl();
	return;
	}

	InitExpr = Init.get();

	FD->setInClassInitializer(InitExpr);
	}

	/// \brief Find the direct and/or virtual base specifiers that
	/// correspond to the given base type, for use in base initialization
	/// within a constructor.
	static bool FindBaseInitializer(Sema &SemaRef,
	CXXRecordDecl *ClassDecl,
	QualType BaseType,
	const CXXBaseSpecifier *&DirectBaseSpec,
	const CXXBaseSpecifier *&VirtualBaseSpec) {
	// First, check for a direct base class.
	DirectBaseSpec = nullptr;
	for (const auto &Base : ClassDecl->bases()) {
	if (SemaRef.Context.hasSameUnqualifiedType(BaseType, Base.getType())) {
	// We found a direct base of this type. That's what we're
	// initializing.
	DirectBaseSpec = &Base;
	break;
	}
	}

	// Check for a virtual base class.
	// FIXME: We might be able to short-circuit this if we know in advance that
	// there are no virtual bases.
	VirtualBaseSpec = nullptr;
	if (!DirectBaseSpec \|\| !DirectBaseSpec->isVirtual()) {
	// We haven't found a base yet; search the class hierarchy for a
	// virtual base class.
	CXXBasePaths Paths(/FindAmbiguities=/true, /RecordPaths=/true,
	/DetectVirtual=/false);
	if (SemaRef.IsDerivedFrom(ClassDecl->getLocation(),
	SemaRef.Context.getTypeDeclType(ClassDecl),
	BaseType, Paths)) {
	for (CXXBasePaths::paths_iterator Path = Paths.begin();
	Path != Paths.end(); ++Path) {
	if (Path->back().Base->isVirtual()) {
	VirtualBaseSpec = Path->back().Base;
	break;
	}
	}
	}
	}

	return DirectBaseSpec \|\| VirtualBaseSpec;
	}

	/// \brief Handle a C++ member initializer using braced-init-list syntax.
	MemInitResult
	Sema::ActOnMemInitializer(Decl *ConstructorD,
	Scope *S,
	CXXScopeSpec &SS,
	IdentifierInfo *MemberOrBase,
	ParsedType TemplateTypeTy,
	const DeclSpec &DS,
	SourceLocation IdLoc,
	Expr *InitList,
	SourceLocation EllipsisLoc) {
	return BuildMemInitializer(ConstructorD, S, SS, MemberOrBase, TemplateTypeTy,
	DS, IdLoc, InitList,
	EllipsisLoc);
	}

	/// \brief Handle a C++ member initializer using parentheses syntax.
	MemInitResult
	Sema::ActOnMemInitializer(Decl *ConstructorD,
	Scope *S,
	CXXScopeSpec &SS,
	IdentifierInfo *MemberOrBase,
	ParsedType TemplateTypeTy,
	const DeclSpec &DS,
	SourceLocation IdLoc,
	SourceLocation LParenLoc,
	ArrayRef<Expr *> Args,
	SourceLocation RParenLoc,
	SourceLocation EllipsisLoc) {
	Expr *List = new (Context) ParenListExpr(Context, LParenLoc,
	Args, RParenLoc);
	return BuildMemInitializer(ConstructorD, S, SS, MemberOrBase, TemplateTypeTy,
	DS, IdLoc, List, EllipsisLoc);
	}

	namespace {

	// Callback to only accept typo corrections that can be a valid C++ member
	// intializer: either a non-static field member or a base class.
	class MemInitializerValidatorCCC : public CorrectionCandidateCallback {
	public:
	explicit MemInitializerValidatorCCC(CXXRecordDecl *ClassDecl)
	: ClassDecl(ClassDecl) {}

	bool ValidateCandidate(const TypoCorrection &candidate) override {
	if (NamedDecl *ND = candidate.getCorrectionDecl()) {
	if (FieldDecl *Member = dyn_cast<FieldDecl>(ND))
	return Member->getDeclContext()->getRedeclContext()->Equals(ClassDecl);
	return isa<TypeDecl>(ND);
	}
	return false;
	}

	private:
	CXXRecordDecl *ClassDecl;
	};

	}

	/// \brief Handle a C++ member initializer.
	MemInitResult
	Sema::BuildMemInitializer(Decl *ConstructorD,
	Scope *S,
	CXXScopeSpec &SS,
	IdentifierInfo *MemberOrBase,
	ParsedType TemplateTypeTy,
	const DeclSpec &DS,
	SourceLocation IdLoc,
	Expr *Init,
	SourceLocation EllipsisLoc) {
	ExprResult Res = CorrectDelayedTyposInExpr(Init);
	if (!Res.isUsable())
	return true;
	Init = Res.get();

	if (!ConstructorD)
	return true;

	AdjustDeclIfTemplate(ConstructorD);

	CXXConstructorDecl *Constructor
	= dyn_cast<CXXConstructorDecl>(ConstructorD);
	if (!Constructor) {
	// The user wrote a constructor initializer on a function that is
	// not a C++ constructor. Ignore the error for now, because we may
	// have more member initializers coming; we'll diagnose it just
	// once in ActOnMemInitializers.
	return true;
	}

	CXXRecordDecl *ClassDecl = Constructor->getParent();

	// C++ [class.base.init]p2:
	// Names in a mem-initializer-id are looked up in the scope of the
	// constructor's class and, if not found in that scope, are looked
	// up in the scope containing the constructor's definition.
	// [Note: if the constructor's class contains a member with the
	// same name as a direct or virtual base class of the class, a
	// mem-initializer-id naming the member or base class and composed
	// of a single identifier refers to the class member. A
	// mem-initializer-id for the hidden base class may be specified
	// using a qualified name. ]
	if (!SS.getScopeRep() && !TemplateTypeTy) {
	// Look for a member, first.
	DeclContext::lookup_result Result = ClassDecl->lookup(MemberOrBase);
	if (!Result.empty()) {
	ValueDecl *Member;
	if ((Member = dyn_cast<FieldDecl>(Result.front())) \|\|
	(Member = dyn_cast<IndirectFieldDecl>(Result.front()))) {
	if (EllipsisLoc.isValid())
	Diag(EllipsisLoc, diag::err_pack_expansion_member_init)
	<< MemberOrBase
	<< SourceRange(IdLoc, Init->getSourceRange().getEnd());

	return BuildMemberInitializer(Member, Init, IdLoc);
	}
	}
	}
	// It didn't name a member, so see if it names a class.
	QualType BaseType;
	TypeSourceInfo *TInfo = nullptr;

	if (TemplateTypeTy) {
	BaseType = GetTypeFromParser(TemplateTypeTy, &TInfo);
	} else if (DS.getTypeSpecType() == TST_decltype) {
	BaseType = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc());
	} else if (DS.getTypeSpecType() == TST_decltype_auto) {
	Diag(DS.getTypeSpecTypeLoc(), diag::err_decltype_auto_invalid);
	return true;
	} else {
	LookupResult R(*this, MemberOrBase, IdLoc, LookupOrdinaryName);
	LookupParsedName(R, S, &SS);

	TypeDecl *TyD = R.getAsSingle<TypeDecl>();
	if (!TyD) {
	if (R.isAmbiguous()) return true;

	// We don't want access-control diagnostics here.
	R.suppressDiagnostics();

	if (SS.isSet() && isDependentScopeSpecifier(SS)) {
	bool NotUnknownSpecialization = false;
	DeclContext *DC = computeDeclContext(SS, false);
	if (CXXRecordDecl *Record = dyn_cast_or_null<CXXRecordDecl>(DC))
	NotUnknownSpecialization = !Record->hasAnyDependentBases();

	if (!NotUnknownSpecialization) {
	// When the scope specifier can refer to a member of an unknown
	// specialization, we take it as a type name.
	BaseType = CheckTypenameType(ETK_None, SourceLocation(),
	SS.getWithLocInContext(Context),
	*MemberOrBase, IdLoc);
	if (BaseType.isNull())
	return true;

	TInfo = Context.CreateTypeSourceInfo(BaseType);
	DependentNameTypeLoc TL =
	TInfo->getTypeLoc().castAs<DependentNameTypeLoc>();
	if (!TL.isNull()) {
	TL.setNameLoc(IdLoc);
	TL.setElaboratedKeywordLoc(SourceLocation());
	TL.setQualifierLoc(SS.getWithLocInContext(Context));
	}

	R.clear();
	R.setLookupName(MemberOrBase);
	}
	}

	// If no results were found, try to correct typos.
	TypoCorrection Corr;
	if (R.empty() && BaseType.isNull() &&
	(Corr = CorrectTypo(
	R.getLookupNameInfo(), R.getLookupKind(), S, &SS,
	llvm::make_unique<MemInitializerValidatorCCC>(ClassDecl),
	CTK_ErrorRecovery, ClassDecl))) {
	if (FieldDecl *Member = Corr.getCorrectionDeclAs<FieldDecl>()) {
	// We have found a non-static data member with a similar
	// name to what was typed; complain and initialize that
	// member.
	diagnoseTypo(Corr,
	PDiag(diag::err_mem_init_not_member_or_class_suggest)
	<< MemberOrBase << true);
	return BuildMemberInitializer(Member, Init, IdLoc);
	} else if (TypeDecl *Type = Corr.getCorrectionDeclAs<TypeDecl>()) {
	const CXXBaseSpecifier *DirectBaseSpec;
	const CXXBaseSpecifier *VirtualBaseSpec;
	if (FindBaseInitializer(*this, ClassDecl,
	Context.getTypeDeclType(Type),
	DirectBaseSpec, VirtualBaseSpec)) {
	// We have found a direct or virtual base class with a
	// similar name to what was typed; complain and initialize
	// that base class.
	diagnoseTypo(Corr,
	PDiag(diag::err_mem_init_not_member_or_class_suggest)
	<< MemberOrBase << false,
	PDiag() /Suppress note, we provide our own./);

	const CXXBaseSpecifier *BaseSpec = DirectBaseSpec ? DirectBaseSpec
	: VirtualBaseSpec;
	Diag(BaseSpec->getLocStart(),
	diag::note_base_class_specified_here)
	<< BaseSpec->getType()
	<< BaseSpec->getSourceRange();

	TyD = Type;
	}
	}
	}

	if (!TyD && BaseType.isNull()) {
	Diag(IdLoc, diag::err_mem_init_not_member_or_class)
	<< MemberOrBase << SourceRange(IdLoc,Init->getSourceRange().getEnd());
	return true;
	}
	}

	if (BaseType.isNull()) {
	BaseType = Context.getTypeDeclType(TyD);
	MarkAnyDeclReferenced(TyD->getLocation(), TyD, /OdrUse=/false);
	if (SS.isSet()) {
	BaseType = Context.getElaboratedType(ETK_None, SS.getScopeRep(),
	BaseType);
	TInfo = Context.CreateTypeSourceInfo(BaseType);
	ElaboratedTypeLoc TL = TInfo->getTypeLoc().castAs<ElaboratedTypeLoc>();
	TL.getNamedTypeLoc().castAs<TypeSpecTypeLoc>().setNameLoc(IdLoc);
	TL.setElaboratedKeywordLoc(SourceLocation());
	TL.setQualifierLoc(SS.getWithLocInContext(Context));
	}
	}
	}

	if (!TInfo)
	TInfo = Context.getTrivialTypeSourceInfo(BaseType, IdLoc);

	return BuildBaseInitializer(BaseType, TInfo, Init, ClassDecl, EllipsisLoc);
	}

	/// Checks a member initializer expression for cases where reference (or
	/// pointer) members are bound to by-value parameters (or their addresses).
	static void CheckForDanglingReferenceOrPointer(Sema &S, ValueDecl *Member,
	Expr *Init,
	SourceLocation IdLoc) {
	QualType MemberTy = Member->getType();

	// We only handle pointers and references currently.
	// FIXME: Would this be relevant for ObjC object pointers? Or block pointers?
	if (!MemberTy->isReferenceType() && !MemberTy->isPointerType())
	return;

	const bool IsPointer = MemberTy->isPointerType();
	if (IsPointer) {
	if (const UnaryOperator *Op
	= dyn_cast<UnaryOperator>(Init->IgnoreParenImpCasts())) {
	// The only case we're worried about with pointers requires taking the
	// address.
	if (Op->getOpcode() != UO_AddrOf)
	return;

	Init = Op->getSubExpr();
	} else {
	// We only handle address-of expression initializers for pointers.
	return;
	}
	}

	if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Init->IgnoreParens())) {
	// We only warn when referring to a non-reference parameter declaration.
	const ParmVarDecl *Parameter = dyn_cast<ParmVarDecl>(DRE->getDecl());
	if (!Parameter \|\| Parameter->getType()->isReferenceType())
	return;

	S.Diag(Init->getExprLoc(),
	IsPointer ? diag::warn_init_ptr_member_to_parameter_addr
	: diag::warn_bind_ref_member_to_parameter)
	<< Member << Parameter << Init->getSourceRange();
	} else {
	// Other initializers are fine.
	return;
	}

	S.Diag(Member->getLocation(), diag::note_ref_or_ptr_member_declared_here)
	<< (unsigned)IsPointer;
	}

	MemInitResult
	Sema::BuildMemberInitializer(ValueDecl Member, Expr Init,
	SourceLocation IdLoc) {
	FieldDecl *DirectMember = dyn_cast<FieldDecl>(Member);
	IndirectFieldDecl *IndirectMember = dyn_cast<IndirectFieldDecl>(Member);
	assert((DirectMember \|\| IndirectMember) &&
	"Member must be a FieldDecl or IndirectFieldDecl");

	if (DiagnoseUnexpandedParameterPack(Init, UPPC_Initializer))
	return true;

	if (Member->isInvalidDecl())
	return true;

	MultiExprArg Args;
	if (ParenListExpr *ParenList = dyn_cast<ParenListExpr>(Init)) {
	Args = MultiExprArg(ParenList->getExprs(), ParenList->getNumExprs());
	} else if (InitListExpr *InitList = dyn_cast<InitListExpr>(Init)) {
	Args = MultiExprArg(InitList->getInits(), InitList->getNumInits());
	} else {
	// Template instantiation doesn't reconstruct ParenListExprs for us.
	Args = Init;
	}

	SourceRange InitRange = Init->getSourceRange();

	if (Member->getType()->isDependentType() \|\| Init->isTypeDependent()) {
	// Can't check initialization for a member of dependent type or when
	// any of the arguments are type-dependent expressions.
	DiscardCleanupsInEvaluationContext();
	} else {
	bool InitList = false;
	if (isa<InitListExpr>(Init)) {
	InitList = true;
	Args = Init;
	}

	// Initialize the member.
	InitializedEntity MemberEntity =
	DirectMember ? InitializedEntity::InitializeMember(DirectMember, nullptr)
	: InitializedEntity::InitializeMember(IndirectMember,
	nullptr);
	InitializationKind Kind =
	InitList ? InitializationKind::CreateDirectList(IdLoc)
	: InitializationKind::CreateDirect(IdLoc, InitRange.getBegin(),
	InitRange.getEnd());

	InitializationSequence InitSeq(*this, MemberEntity, Kind, Args);
	ExprResult MemberInit = InitSeq.Perform(*this, MemberEntity, Kind, Args,
	nullptr);
	if (MemberInit.isInvalid())
	return true;

	CheckForDanglingReferenceOrPointer(*this, Member, MemberInit.get(), IdLoc);

	// C++11 [class.base.init]p7:
	// The initialization of each base and member constitutes a
	// full-expression.
	MemberInit = ActOnFinishFullExpr(MemberInit.get(), InitRange.getBegin());
	if (MemberInit.isInvalid())
	return true;

	Init = MemberInit.get();
	}

	if (DirectMember) {
	return new (Context) CXXCtorInitializer(Context, DirectMember, IdLoc,
	InitRange.getBegin(), Init,
	InitRange.getEnd());
	} else {
	return new (Context) CXXCtorInitializer(Context, IndirectMember, IdLoc,
	InitRange.getBegin(), Init,
	InitRange.getEnd());
	}
	}

	MemInitResult
	Sema::BuildDelegatingInitializer(TypeSourceInfo TInfo, Expr Init,
	CXXRecordDecl *ClassDecl) {
	SourceLocation NameLoc = TInfo->getTypeLoc().getLocalSourceRange().getBegin();
	if (!LangOpts.CPlusPlus11)
	return Diag(NameLoc, diag::err_delegating_ctor)
	<< TInfo->getTypeLoc().getLocalSourceRange();
	Diag(NameLoc, diag::warn_cxx98_compat_delegating_ctor);

	bool InitList = true;
	MultiExprArg Args = Init;
	if (ParenListExpr *ParenList = dyn_cast<ParenListExpr>(Init)) {
	InitList = false;
	Args = MultiExprArg(ParenList->getExprs(), ParenList->getNumExprs());
	}

	SourceRange InitRange = Init->getSourceRange();
	// Initialize the object.
	InitializedEntity DelegationEntity = InitializedEntity::InitializeDelegation(
	QualType(ClassDecl->getTypeForDecl(), 0));
	InitializationKind Kind =
	InitList ? InitializationKind::CreateDirectList(NameLoc)
	: InitializationKind::CreateDirect(NameLoc, InitRange.getBegin(),
	InitRange.getEnd());
	InitializationSequence InitSeq(*this, DelegationEntity, Kind, Args);
	ExprResult DelegationInit = InitSeq.Perform(*this, DelegationEntity, Kind,
	Args, nullptr);
	if (DelegationInit.isInvalid())
	return true;

	assert(cast<CXXConstructExpr>(DelegationInit.get())->getConstructor() &&
	"Delegating constructor with no target?");

	// C++11 [class.base.init]p7:
	// The initialization of each base and member constitutes a
	// full-expression.
	DelegationInit = ActOnFinishFullExpr(DelegationInit.get(),
	InitRange.getBegin());
	if (DelegationInit.isInvalid())
	return true;

	// If we are in a dependent context, template instantiation will
	// perform this type-checking again. Just save the arguments that we
	// received in a ParenListExpr.
	// FIXME: This isn't quite ideal, since our ASTs don't capture all
	// of the information that we have about the base
	// initializer. However, deconstructing the ASTs is a dicey process,
	// and this approach is far more likely to get the corner cases right.
	if (CurContext->isDependentContext())
	DelegationInit = Init;

	return new (Context) CXXCtorInitializer(Context, TInfo, InitRange.getBegin(),
	DelegationInit.getAs<Expr>(),
	InitRange.getEnd());
	}

	MemInitResult
	Sema::BuildBaseInitializer(QualType BaseType, TypeSourceInfo *BaseTInfo,
	Expr Init, CXXRecordDecl ClassDecl,
	SourceLocation EllipsisLoc) {
	SourceLocation BaseLoc
	= BaseTInfo->getTypeLoc().getLocalSourceRange().getBegin();

	if (!BaseType->isDependentType() && !BaseType->isRecordType())
	return Diag(BaseLoc, diag::err_base_init_does_not_name_class)
	<< BaseType << BaseTInfo->getTypeLoc().getLocalSourceRange();

	// C++ [class.base.init]p2:
	// [...] Unless the mem-initializer-id names a nonstatic data
	// member of the constructor's class or a direct or virtual base
	// of that class, the mem-initializer is ill-formed. A
	// mem-initializer-list can initialize a base class using any
	// name that denotes that base class type.
	bool Dependent = BaseType->isDependentType() \|\| Init->isTypeDependent();

	SourceRange InitRange = Init->getSourceRange();
	if (EllipsisLoc.isValid()) {
	// This is a pack expansion.
	if (!BaseType->containsUnexpandedParameterPack()) {
	Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs)
	<< SourceRange(BaseLoc, InitRange.getEnd());

	EllipsisLoc = SourceLocation();
	}
	} else {
	// Check for any unexpanded parameter packs.
	if (DiagnoseUnexpandedParameterPack(BaseLoc, BaseTInfo, UPPC_Initializer))
	return true;

	if (DiagnoseUnexpandedParameterPack(Init, UPPC_Initializer))
	return true;
	}

	// Check for direct and virtual base classes.
	const CXXBaseSpecifier *DirectBaseSpec = nullptr;
	const CXXBaseSpecifier *VirtualBaseSpec = nullptr;
	if (!Dependent) {
	if (Context.hasSameUnqualifiedType(QualType(ClassDecl->getTypeForDecl(),0),
	BaseType))
	return BuildDelegatingInitializer(BaseTInfo, Init, ClassDecl);

	FindBaseInitializer(*this, ClassDecl, BaseType, DirectBaseSpec,
	VirtualBaseSpec);

	// C++ [base.class.init]p2:
	// Unless the mem-initializer-id names a nonstatic data member of the
	// constructor's class or a direct or virtual base of that class, the
	// mem-initializer is ill-formed.
	if (!DirectBaseSpec && !VirtualBaseSpec) {
	// If the class has any dependent bases, then it's possible that
	// one of those types will resolve to the same type as
	// BaseType. Therefore, just treat this as a dependent base
	// class initialization. FIXME: Should we try to check the
	// initialization anyway? It seems odd.
	if (ClassDecl->hasAnyDependentBases())
	Dependent = true;
	else
	return Diag(BaseLoc, diag::err_not_direct_base_or_virtual)
	<< BaseType << Context.getTypeDeclType(ClassDecl)
	<< BaseTInfo->getTypeLoc().getLocalSourceRange();
	}
	}

	if (Dependent) {
	DiscardCleanupsInEvaluationContext();

	return new (Context) CXXCtorInitializer(Context, BaseTInfo,
	/IsVirtual=/false,
	InitRange.getBegin(), Init,
	InitRange.getEnd(), EllipsisLoc);
	}

	// C++ [base.class.init]p2:
	// If a mem-initializer-id is ambiguous because it designates both
	// a direct non-virtual base class and an inherited virtual base
	// class, the mem-initializer is ill-formed.
	if (DirectBaseSpec && VirtualBaseSpec)
	return Diag(BaseLoc, diag::err_base_init_direct_and_virtual)
	<< BaseType << BaseTInfo->getTypeLoc().getLocalSourceRange();

	const CXXBaseSpecifier *BaseSpec = DirectBaseSpec;
	if (!BaseSpec)
	BaseSpec = VirtualBaseSpec;

	// Initialize the base.
	bool InitList = true;
	MultiExprArg Args = Init;
	if (ParenListExpr *ParenList = dyn_cast<ParenListExpr>(Init)) {
	InitList = false;
	Args = MultiExprArg(ParenList->getExprs(), ParenList->getNumExprs());
	}

	InitializedEntity BaseEntity =
	InitializedEntity::InitializeBase(Context, BaseSpec, VirtualBaseSpec);
	InitializationKind Kind =
	InitList ? InitializationKind::CreateDirectList(BaseLoc)
	: InitializationKind::CreateDirect(BaseLoc, InitRange.getBegin(),
	InitRange.getEnd());
	InitializationSequence InitSeq(*this, BaseEntity, Kind, Args);
	ExprResult BaseInit = InitSeq.Perform(*this, BaseEntity, Kind, Args, nullptr);
	if (BaseInit.isInvalid())
	return true;

	// C++11 [class.base.init]p7:
	// The initialization of each base and member constitutes a
	// full-expression.
	BaseInit = ActOnFinishFullExpr(BaseInit.get(), InitRange.getBegin());
	if (BaseInit.isInvalid())
	return true;

	// If we are in a dependent context, template instantiation will
	// perform this type-checking again. Just save the arguments that we
	// received in a ParenListExpr.
	// FIXME: This isn't quite ideal, since our ASTs don't capture all
	// of the information that we have about the base
	// initializer. However, deconstructing the ASTs is a dicey process,
	// and this approach is far more likely to get the corner cases right.
	if (CurContext->isDependentContext())
	BaseInit = Init;

	return new (Context) CXXCtorInitializer(Context, BaseTInfo,
	BaseSpec->isVirtual(),
	InitRange.getBegin(),
	BaseInit.getAs<Expr>(),
	InitRange.getEnd(), EllipsisLoc);
	}

	// Create a static_cast\<T&&>(expr).
	static Expr CastForMoving(Sema &SemaRef, Expr E, QualType T = QualType()) {
	if (T.isNull()) T = E->getType();
	QualType TargetType = SemaRef.BuildReferenceType(
	T, /SpelledAsLValue/false, SourceLocation(), DeclarationName());
	SourceLocation ExprLoc = E->getLocStart();
	TypeSourceInfo *TargetLoc = SemaRef.Context.getTrivialTypeSourceInfo(
	TargetType, ExprLoc);

	return SemaRef.BuildCXXNamedCast(ExprLoc, tok::kw_static_cast, TargetLoc, E,
	SourceRange(ExprLoc, ExprLoc),
	E->getSourceRange()).get();
	}

	/// ImplicitInitializerKind - How an implicit base or member initializer should
	/// initialize its base or member.
	enum ImplicitInitializerKind {
	IIK_Default,
	IIK_Copy,
	IIK_Move,
	IIK_Inherit
	};

	static bool
	BuildImplicitBaseInitializer(Sema &SemaRef, CXXConstructorDecl *Constructor,
	ImplicitInitializerKind ImplicitInitKind,
	CXXBaseSpecifier *BaseSpec,
	bool IsInheritedVirtualBase,
	CXXCtorInitializer *&CXXBaseInit) {
	InitializedEntity InitEntity
	= InitializedEntity::InitializeBase(SemaRef.Context, BaseSpec,
	IsInheritedVirtualBase);

	ExprResult BaseInit;

	switch (ImplicitInitKind) {
	case IIK_Inherit:
	case IIK_Default: {
	InitializationKind InitKind
	= InitializationKind::CreateDefault(Constructor->getLocation());
	InitializationSequence InitSeq(SemaRef, InitEntity, InitKind, None);
	BaseInit = InitSeq.Perform(SemaRef, InitEntity, InitKind, None);
	break;
	}

	case IIK_Move:
	case IIK_Copy: {
	bool Moving = ImplicitInitKind == IIK_Move;
	ParmVarDecl *Param = Constructor->getParamDecl(0);
	QualType ParamType = Param->getType().getNonReferenceType();

	Expr *CopyCtorArg =
	DeclRefExpr::Create(SemaRef.Context, NestedNameSpecifierLoc(),
	SourceLocation(), Param, false,
	Constructor->getLocation(), ParamType,
	VK_LValue, nullptr);

	SemaRef.MarkDeclRefReferenced(cast<DeclRefExpr>(CopyCtorArg));

	// Cast to the base class to avoid ambiguities.
	QualType ArgTy =
	SemaRef.Context.getQualifiedType(BaseSpec->getType().getUnqualifiedType(),
	ParamType.getQualifiers());

	if (Moving) {
	CopyCtorArg = CastForMoving(SemaRef, CopyCtorArg);
	}

	CXXCastPath BasePath;
	BasePath.push_back(BaseSpec);
	CopyCtorArg = SemaRef.ImpCastExprToType(CopyCtorArg, ArgTy,
	CK_UncheckedDerivedToBase,
	Moving ? VK_XValue : VK_LValue,
	&BasePath).get();

	InitializationKind InitKind
	= InitializationKind::CreateDirect(Constructor->getLocation(),
	SourceLocation(), SourceLocation());
	InitializationSequence InitSeq(SemaRef, InitEntity, InitKind, CopyCtorArg);
	BaseInit = InitSeq.Perform(SemaRef, InitEntity, InitKind, CopyCtorArg);
	break;
	}
	}

	BaseInit = SemaRef.MaybeCreateExprWithCleanups(BaseInit);
	if (BaseInit.isInvalid())
	return true;

	CXXBaseInit =
	new (SemaRef.Context) CXXCtorInitializer(SemaRef.Context,
	SemaRef.Context.getTrivialTypeSourceInfo(BaseSpec->getType(),
	SourceLocation()),
	BaseSpec->isVirtual(),
	SourceLocation(),
	BaseInit.getAs<Expr>(),
	SourceLocation(),
	SourceLocation());

	return false;
	}

	static bool RefersToRValueRef(Expr *MemRef) {
	ValueDecl *Referenced = cast<MemberExpr>(MemRef)->getMemberDecl();
	return Referenced->getType()->isRValueReferenceType();
	}

	static bool
	BuildImplicitMemberInitializer(Sema &SemaRef, CXXConstructorDecl *Constructor,
	ImplicitInitializerKind ImplicitInitKind,
	FieldDecl Field, IndirectFieldDecl Indirect,
	CXXCtorInitializer *&CXXMemberInit) {
	if (Field->isInvalidDecl())
	return true;

	SourceLocation Loc = Constructor->getLocation();

	if (ImplicitInitKind == IIK_Copy \|\| ImplicitInitKind == IIK_Move) {
	bool Moving = ImplicitInitKind == IIK_Move;
	ParmVarDecl *Param = Constructor->getParamDecl(0);
	QualType ParamType = Param->getType().getNonReferenceType();

	// Suppress copying zero-width bitfields.
	if (Field->isBitField() && Field->getBitWidthValue(SemaRef.Context) == 0)
	return false;

	Expr *MemberExprBase =
	DeclRefExpr::Create(SemaRef.Context, NestedNameSpecifierLoc(),
	SourceLocation(), Param, false,
	Loc, ParamType, VK_LValue, nullptr);

	SemaRef.MarkDeclRefReferenced(cast<DeclRefExpr>(MemberExprBase));

	if (Moving) {
	MemberExprBase = CastForMoving(SemaRef, MemberExprBase);
	}

	// Build a reference to this field within the parameter.
	CXXScopeSpec SS;
	LookupResult MemberLookup(SemaRef, Field->getDeclName(), Loc,
	Sema::LookupMemberName);
	MemberLookup.addDecl(Indirect ? cast<ValueDecl>(Indirect)
	: cast<ValueDecl>(Field), AS_public);
	MemberLookup.resolveKind();
	ExprResult CtorArg
	= SemaRef.BuildMemberReferenceExpr(MemberExprBase,
	ParamType, Loc,
	/IsArrow=/false,
	SS,
	/TemplateKWLoc=/SourceLocation(),
	/FirstQualifierInScope=/nullptr,
	MemberLookup,
	/TemplateArgs=/nullptr,
	/S/nullptr);
	if (CtorArg.isInvalid())
	return true;

	// C++11 [class.copy]p15:
	// - if a member m has rvalue reference type T&&, it is direct-initialized
	// with static_cast<T&&>(x.m);
	if (RefersToRValueRef(CtorArg.get())) {
	CtorArg = CastForMoving(SemaRef, CtorArg.get());
	}

	InitializedEntity Entity =
	Indirect ? InitializedEntity::InitializeMember(Indirect, nullptr,
	/Implicit/ true)
	: InitializedEntity::InitializeMember(Field, nullptr,
	/Implicit/ true);

	// Direct-initialize to use the copy constructor.
	InitializationKind InitKind =
	InitializationKind::CreateDirect(Loc, SourceLocation(), SourceLocation());

	Expr *CtorArgE = CtorArg.getAs<Expr>();
	InitializationSequence InitSeq(SemaRef, Entity, InitKind, CtorArgE);
	ExprResult MemberInit =
	InitSeq.Perform(SemaRef, Entity, InitKind, MultiExprArg(&CtorArgE, 1));
	MemberInit = SemaRef.MaybeCreateExprWithCleanups(MemberInit);
	if (MemberInit.isInvalid())
	return true;

	if (Indirect)
	CXXMemberInit = new (SemaRef.Context) CXXCtorInitializer(
	SemaRef.Context, Indirect, Loc, Loc, MemberInit.getAs<Expr>(), Loc);
	else
	CXXMemberInit = new (SemaRef.Context) CXXCtorInitializer(
	SemaRef.Context, Field, Loc, Loc, MemberInit.getAs<Expr>(), Loc);
	return false;
	}

	assert((ImplicitInitKind == IIK_Default \|\| ImplicitInitKind == IIK_Inherit) &&
	"Unhandled implicit init kind!");

	QualType FieldBaseElementType =
	SemaRef.Context.getBaseElementType(Field->getType());

	if (FieldBaseElementType->isRecordType()) {
	InitializedEntity InitEntity =
	Indirect ? InitializedEntity::InitializeMember(Indirect, nullptr,
	/Implicit/ true)
	: InitializedEntity::InitializeMember(Field, nullptr,
	/Implicit/ true);
	InitializationKind InitKind =
	InitializationKind::CreateDefault(Loc);

	InitializationSequence InitSeq(SemaRef, InitEntity, InitKind, None);
	ExprResult MemberInit =
	InitSeq.Perform(SemaRef, InitEntity, InitKind, None);

	MemberInit = SemaRef.MaybeCreateExprWithCleanups(MemberInit);
	if (MemberInit.isInvalid())
	return true;

	if (Indirect)
	CXXMemberInit = new (SemaRef.Context) CXXCtorInitializer(SemaRef.Context,
	Indirect, Loc,
	Loc,
	MemberInit.get(),
	Loc);
	else
	CXXMemberInit = new (SemaRef.Context) CXXCtorInitializer(SemaRef.Context,
	Field, Loc, Loc,
	MemberInit.get(),
	Loc);
	return false;
	}

	if (!Field->getParent()->isUnion()) {
	if (FieldBaseElementType->isReferenceType()) {
	SemaRef.Diag(Constructor->getLocation(),
	diag::err_uninitialized_member_in_ctor)
	<< (int)Constructor->isImplicit()
	<< SemaRef.Context.getTagDeclType(Constructor->getParent())
	<< 0 << Field->getDeclName();
	SemaRef.Diag(Field->getLocation(), diag::note_declared_at);
	return true;
	}

	if (FieldBaseElementType.isConstQualified()) {
	SemaRef.Diag(Constructor->getLocation(),
	diag::err_uninitialized_member_in_ctor)
	<< (int)Constructor->isImplicit()
	<< SemaRef.Context.getTagDeclType(Constructor->getParent())
	<< 1 << Field->getDeclName();
	SemaRef.Diag(Field->getLocation(), diag::note_declared_at);
	return true;
	}
	}

	if (FieldBaseElementType.hasNonTrivialObjCLifetime()) {
	// ARC and Weak:
	// Default-initialize Objective-C pointers to NULL.
	CXXMemberInit
	= new (SemaRef.Context) CXXCtorInitializer(SemaRef.Context, Field,
	Loc, Loc,
	new (SemaRef.Context) ImplicitValueInitExpr(Field->getType()),
	Loc);
	return false;
	}

	// Nothing to initialize.
	CXXMemberInit = nullptr;
	return false;
	}

	namespace {
	struct BaseAndFieldInfo {
	Sema &S;
	CXXConstructorDecl *Ctor;
	bool AnyErrorsInInits;
	ImplicitInitializerKind IIK;
	llvm::DenseMap<const void , CXXCtorInitializer> AllBaseFields;
	SmallVector<CXXCtorInitializer*, 8> AllToInit;
	llvm::DenseMap<TagDecl, FieldDecl> ActiveUnionMember;

	BaseAndFieldInfo(Sema &S, CXXConstructorDecl *Ctor, bool ErrorsInInits)
	: S(S), Ctor(Ctor), AnyErrorsInInits(ErrorsInInits) {
	bool Generated = Ctor->isImplicit() \|\| Ctor->isDefaulted();
	if (Ctor->getInheritedConstructor())
	IIK = IIK_Inherit;
	else if (Generated && Ctor->isCopyConstructor())
	IIK = IIK_Copy;
	else if (Generated && Ctor->isMoveConstructor())
	IIK = IIK_Move;
	else
	IIK = IIK_Default;
	}

	bool isImplicitCopyOrMove() const {
	switch (IIK) {
	case IIK_Copy:
	case IIK_Move:
	return true;

	case IIK_Default:
	case IIK_Inherit:
	return false;
	}

	llvm_unreachable("Invalid ImplicitInitializerKind!");
	}

	bool addFieldInitializer(CXXCtorInitializer *Init) {
	AllToInit.push_back(Init);

	// Check whether this initializer makes the field "used".
	if (Init->getInit()->HasSideEffects(S.Context))
	S.UnusedPrivateFields.remove(Init->getAnyMember());

	return false;
	}

	bool isInactiveUnionMember(FieldDecl *Field) {
	RecordDecl *Record = Field->getParent();
	if (!Record->isUnion())
	return false;

	if (FieldDecl *Active =
	ActiveUnionMember.lookup(Record->getCanonicalDecl()))
	return Active != Field->getCanonicalDecl();

	// In an implicit copy or move constructor, ignore any in-class initializer.
	if (isImplicitCopyOrMove())
	return true;

	// If there's no explicit initialization, the field is active only if it
	// has an in-class initializer...
	if (Field->hasInClassInitializer())
	return false;
	// ... or it's an anonymous struct or union whose class has an in-class
	// initializer.
	if (!Field->isAnonymousStructOrUnion())
	return true;
	CXXRecordDecl *FieldRD = Field->getType()->getAsCXXRecordDecl();
	return !FieldRD->hasInClassInitializer();
	}

	/// \brief Determine whether the given field is, or is within, a union member
	/// that is inactive (because there was an initializer given for a different
	/// member of the union, or because the union was not initialized at all).
	bool isWithinInactiveUnionMember(FieldDecl *Field,
	IndirectFieldDecl *Indirect) {
	if (!Indirect)
	return isInactiveUnionMember(Field);

	for (auto *C : Indirect->chain()) {
	FieldDecl *Field = dyn_cast<FieldDecl>(C);
	if (Field && isInactiveUnionMember(Field))
	return true;
	}
	return false;
	}
	};
	}

	/// \brief Determine whether the given type is an incomplete or zero-lenfgth
	/// array type.
	static bool isIncompleteOrZeroLengthArrayType(ASTContext &Context, QualType T) {
	if (T->isIncompleteArrayType())
	return true;

	while (const ConstantArrayType *ArrayT = Context.getAsConstantArrayType(T)) {
	if (!ArrayT->getSize())
	return true;

	T = ArrayT->getElementType();
	}

	return false;
	}

	static bool CollectFieldInitializer(Sema &SemaRef, BaseAndFieldInfo &Info,
	FieldDecl *Field,
	IndirectFieldDecl *Indirect = nullptr) {
	if (Field->isInvalidDecl())
	return false;

	// Overwhelmingly common case: we have a direct initializer for this field.
	if (CXXCtorInitializer *Init =
	Info.AllBaseFields.lookup(Field->getCanonicalDecl()))
	return Info.addFieldInitializer(Init);

	// C++11 [class.base.init]p8:
	// if the entity is a non-static data member that has a
	// brace-or-equal-initializer and either
	// -- the constructor's class is a union and no other variant member of that
	// union is designated by a mem-initializer-id or
	// -- the constructor's class is not a union, and, if the entity is a member
	// of an anonymous union, no other member of that union is designated by
	// a mem-initializer-id,
	// the entity is initialized as specified in [dcl.init].
	//
	// We also apply the same rules to handle anonymous structs within anonymous
	// unions.
	if (Info.isWithinInactiveUnionMember(Field, Indirect))
	return false;

	if (Field->hasInClassInitializer() && !Info.isImplicitCopyOrMove()) {
	ExprResult DIE =
	SemaRef.BuildCXXDefaultInitExpr(Info.Ctor->getLocation(), Field);
	if (DIE.isInvalid())
	return true;
	CXXCtorInitializer *Init;
	if (Indirect)
	Init = new (SemaRef.Context)
	CXXCtorInitializer(SemaRef.Context, Indirect, SourceLocation(),
	SourceLocation(), DIE.get(), SourceLocation());
	else
	Init = new (SemaRef.Context)
	CXXCtorInitializer(SemaRef.Context, Field, SourceLocation(),
	SourceLocation(), DIE.get(), SourceLocation());
	return Info.addFieldInitializer(Init);
	}

	// Don't initialize incomplete or zero-length arrays.
	if (isIncompleteOrZeroLengthArrayType(SemaRef.Context, Field->getType()))
	return false;

	// Don't try to build an implicit initializer if there were semantic
	// errors in any of the initializers (and therefore we might be
	// missing some that the user actually wrote).
	if (Info.AnyErrorsInInits)
	return false;

	CXXCtorInitializer *Init = nullptr;
	if (BuildImplicitMemberInitializer(Info.S, Info.Ctor, Info.IIK, Field,
	Indirect, Init))
	return true;

	if (!Init)
	return false;

	return Info.addFieldInitializer(Init);
	}

	bool
	Sema::SetDelegatingInitializer(CXXConstructorDecl *Constructor,
	CXXCtorInitializer *Initializer) {
	assert(Initializer->isDelegatingInitializer());
	Constructor->setNumCtorInitializers(1);
	CXXCtorInitializer **initializer =
	new (Context) CXXCtorInitializer*[1];
	memcpy(initializer, &Initializer, sizeof (CXXCtorInitializer*));
	Constructor->setCtorInitializers(initializer);

	if (CXXDestructorDecl *Dtor = LookupDestructor(Constructor->getParent())) {
	MarkFunctionReferenced(Initializer->getSourceLocation(), Dtor);
	DiagnoseUseOfDecl(Dtor, Initializer->getSourceLocation());
	}

	DelegatingCtorDecls.push_back(Constructor);

	DiagnoseUninitializedFields(*this, Constructor);

	return false;
	}

	bool Sema::SetCtorInitializers(CXXConstructorDecl *Constructor, bool AnyErrors,
	ArrayRef<CXXCtorInitializer *> Initializers) {
	if (Constructor->isDependentContext()) {
	// Just store the initializers as written, they will be checked during
	// instantiation.
	if (!Initializers.empty()) {
	Constructor->setNumCtorInitializers(Initializers.size());
	CXXCtorInitializer **baseOrMemberInitializers =
	new (Context) CXXCtorInitializer*[Initializers.size()];
	memcpy(baseOrMemberInitializers, Initializers.data(),
	Initializers.size() * sizeof(CXXCtorInitializer*));
	Constructor->setCtorInitializers(baseOrMemberInitializers);
	}

	// Let template instantiation know whether we had errors.
	if (AnyErrors)
	Constructor->setInvalidDecl();

	return false;
	}

	BaseAndFieldInfo Info(*this, Constructor, AnyErrors);

	// We need to build the initializer AST according to order of construction
	// and not what user specified in the Initializers list.
	CXXRecordDecl *ClassDecl = Constructor->getParent()->getDefinition();
	if (!ClassDecl)
	return true;

	bool HadError = false;

	for (unsigned i = 0; i < Initializers.size(); i++) {
	CXXCtorInitializer *Member = Initializers[i];

	if (Member->isBaseInitializer())
	Info.AllBaseFields[Member->getBaseClass()->getAs<RecordType>()] = Member;
	else {
	Info.AllBaseFields[Member->getAnyMember()->getCanonicalDecl()] = Member;

	if (IndirectFieldDecl *F = Member->getIndirectMember()) {
	for (auto *C : F->chain()) {
	FieldDecl *FD = dyn_cast<FieldDecl>(C);
	if (FD && FD->getParent()->isUnion())
	Info.ActiveUnionMember.insert(std::make_pair(
	FD->getParent()->getCanonicalDecl(), FD->getCanonicalDecl()));
	}
	} else if (FieldDecl *FD = Member->getMember()) {
	if (FD->getParent()->isUnion())
	Info.ActiveUnionMember.insert(std::make_pair(
	FD->getParent()->getCanonicalDecl(), FD->getCanonicalDecl()));
	}
	}
	}

	// Keep track of the direct virtual bases.
	llvm::SmallPtrSet<CXXBaseSpecifier *, 16> DirectVBases;
	for (auto &I : ClassDecl->bases()) {
	if (I.isVirtual())
	DirectVBases.insert(&I);
	}

	// Push virtual bases before others.
	for (auto &VBase : ClassDecl->vbases()) {
	if (CXXCtorInitializer *Value
	= Info.AllBaseFields.lookup(VBase.getType()->getAs<RecordType>())) {
	// [class.base.init]p7, per DR257:
	// A mem-initializer where the mem-initializer-id names a virtual base
	// class is ignored during execution of a constructor of any class that
	// is not the most derived class.
	if (ClassDecl->isAbstract()) {
	// FIXME: Provide a fixit to remove the base specifier. This requires
	// tracking the location of the associated comma for a base specifier.
	Diag(Value->getSourceLocation(), diag::warn_abstract_vbase_init_ignored)
	<< VBase.getType() << ClassDecl;
	DiagnoseAbstractType(ClassDecl);
	}

	Info.AllToInit.push_back(Value);
	} else if (!AnyErrors && !ClassDecl->isAbstract()) {
	// [class.base.init]p8, per DR257:
	// If a given [...] base class is not named by a mem-initializer-id
	// [...] and the entity is not a virtual base class of an abstract
	// class, then [...] the entity is default-initialized.
	bool IsInheritedVirtualBase = !DirectVBases.count(&VBase);
	CXXCtorInitializer *CXXBaseInit;
	if (BuildImplicitBaseInitializer(*this, Constructor, Info.IIK,
	&VBase, IsInheritedVirtualBase,
	CXXBaseInit)) {
	HadError = true;
	continue;
	}

	Info.AllToInit.push_back(CXXBaseInit);
	}
	}

	// Non-virtual bases.
	for (auto &Base : ClassDecl->bases()) {
	// Virtuals are in the virtual base list and already constructed.
	if (Base.isVirtual())
	continue;

	if (CXXCtorInitializer *Value
	= Info.AllBaseFields.lookup(Base.getType()->getAs<RecordType>())) {
	Info.AllToInit.push_back(Value);
	} else if (!AnyErrors) {
	CXXCtorInitializer *CXXBaseInit;
	if (BuildImplicitBaseInitializer(*this, Constructor, Info.IIK,
	&Base, /IsInheritedVirtualBase=/false,
	CXXBaseInit)) {
	HadError = true;
	continue;
	}

	Info.AllToInit.push_back(CXXBaseInit);
	}
	}

	// Fields.
	for (auto *Mem : ClassDecl->decls()) {
	if (auto *F = dyn_cast<FieldDecl>(Mem)) {
	// C++ [class.bit]p2:
	// A declaration for a bit-field that omits the identifier declares an
	// unnamed bit-field. Unnamed bit-fields are not members and cannot be
	// initialized.
	if (F->isUnnamedBitfield())
	continue;

	// If we're not generating the implicit copy/move constructor, then we'll
	// handle anonymous struct/union fields based on their individual
	// indirect fields.
	if (F->isAnonymousStructOrUnion() && !Info.isImplicitCopyOrMove())
	continue;

	if (CollectFieldInitializer(*this, Info, F))
	HadError = true;
	continue;
	}

	// Beyond this point, we only consider default initialization.
	if (Info.isImplicitCopyOrMove())
	continue;

	if (auto *F = dyn_cast<IndirectFieldDecl>(Mem)) {
	if (F->getType()->isIncompleteArrayType()) {
	assert(ClassDecl->hasFlexibleArrayMember() &&
	"Incomplete array type is not valid");
	continue;
	}

	// Initialize each field of an anonymous struct individually.
	if (CollectFieldInitializer(*this, Info, F->getAnonField(), F))
	HadError = true;

	continue;
	}
	}

	unsigned NumInitializers = Info.AllToInit.size();
	if (NumInitializers > 0) {
	Constructor->setNumCtorInitializers(NumInitializers);
	CXXCtorInitializer **baseOrMemberInitializers =
	new (Context) CXXCtorInitializer*[NumInitializers];
	memcpy(baseOrMemberInitializers, Info.AllToInit.data(),
	NumInitializers * sizeof(CXXCtorInitializer*));
	Constructor->setCtorInitializers(baseOrMemberInitializers);

	// Constructors implicitly reference the base and member
	// destructors.
	MarkBaseAndMemberDestructorsReferenced(Constructor->getLocation(),
	Constructor->getParent());
	}

	return HadError;
	}

	static void PopulateKeysForFields(FieldDecl Field, SmallVectorImpl<const void> &IdealInits) {
	if (const RecordType *RT = Field->getType()->getAs<RecordType>()) {
	const RecordDecl *RD = RT->getDecl();
	if (RD->isAnonymousStructOrUnion()) {
	for (auto *Field : RD->fields())
	PopulateKeysForFields(Field, IdealInits);
	return;
	}
	}
	IdealInits.push_back(Field->getCanonicalDecl());
	}

	static const void *GetKeyForBase(ASTContext &Context, QualType BaseType) {
	return Context.getCanonicalType(BaseType).getTypePtr();
	}

	static const void *GetKeyForMember(ASTContext &Context,
	CXXCtorInitializer *Member) {
	if (!Member->isAnyMemberInitializer())
	return GetKeyForBase(Context, QualType(Member->getBaseClass(), 0));

	return Member->getAnyMember()->getCanonicalDecl();
	}

	static void DiagnoseBaseOrMemInitializerOrder(
	Sema &SemaRef, const CXXConstructorDecl *Constructor,
	ArrayRef<CXXCtorInitializer *> Inits) {
	if (Constructor->getDeclContext()->isDependentContext())
	return;

	// Don't check initializers order unless the warning is enabled at the
	// location of at least one initializer.
	bool ShouldCheckOrder = false;
	for (unsigned InitIndex = 0; InitIndex != Inits.size(); ++InitIndex) {
	CXXCtorInitializer *Init = Inits[InitIndex];
	if (!SemaRef.Diags.isIgnored(diag::warn_initializer_out_of_order,
	Init->getSourceLocation())) {
	ShouldCheckOrder = true;
	break;
	}
	}
	if (!ShouldCheckOrder)
	return;

	// Build the list of bases and members in the order that they'll
	// actually be initialized. The explicit initializers should be in
	// this same order but may be missing things.
	SmallVector<const void*, 32> IdealInitKeys;

	const CXXRecordDecl *ClassDecl = Constructor->getParent();

	// 1. Virtual bases.
	for (const auto &VBase : ClassDecl->vbases())
	IdealInitKeys.push_back(GetKeyForBase(SemaRef.Context, VBase.getType()));

	// 2. Non-virtual bases.
	for (const auto &Base : ClassDecl->bases()) {
	if (Base.isVirtual())
	continue;
	IdealInitKeys.push_back(GetKeyForBase(SemaRef.Context, Base.getType()));
	}

	// 3. Direct fields.
	for (auto *Field : ClassDecl->fields()) {
	if (Field->isUnnamedBitfield())
	continue;

	PopulateKeysForFields(Field, IdealInitKeys);
	}

	unsigned NumIdealInits = IdealInitKeys.size();
	unsigned IdealIndex = 0;

	CXXCtorInitializer *PrevInit = nullptr;
	for (unsigned InitIndex = 0; InitIndex != Inits.size(); ++InitIndex) {
	CXXCtorInitializer *Init = Inits[InitIndex];
	const void *InitKey = GetKeyForMember(SemaRef.Context, Init);

	// Scan forward to try to find this initializer in the idealized
	// initializers list.
	for (; IdealIndex != NumIdealInits; ++IdealIndex)
	if (InitKey == IdealInitKeys[IdealIndex])
	break;

	// If we didn't find this initializer, it must be because we
	// scanned past it on a previous iteration. That can only
	// happen if we're out of order; emit a warning.
	if (IdealIndex == NumIdealInits && PrevInit) {
	Sema::SemaDiagnosticBuilder D =
	SemaRef.Diag(PrevInit->getSourceLocation(),
	diag::warn_initializer_out_of_order);

	if (PrevInit->isAnyMemberInitializer())
	D << 0 << PrevInit->getAnyMember()->getDeclName();
	else
	D << 1 << PrevInit->getTypeSourceInfo()->getType();

	if (Init->isAnyMemberInitializer())
	D << 0 << Init->getAnyMember()->getDeclName();
	else
	D << 1 << Init->getTypeSourceInfo()->getType();

	// Move back to the initializer's location in the ideal list.
	for (IdealIndex = 0; IdealIndex != NumIdealInits; ++IdealIndex)
	if (InitKey == IdealInitKeys[IdealIndex])
	break;

	assert(IdealIndex < NumIdealInits &&
	"initializer not found in initializer list");
	}

	PrevInit = Init;
	}
	}

	namespace {
	bool CheckRedundantInit(Sema &S,
	CXXCtorInitializer *Init,
	CXXCtorInitializer *&PrevInit) {
	if (!PrevInit) {
	PrevInit = Init;
	return false;
	}

	if (FieldDecl *Field = Init->getAnyMember())
	S.Diag(Init->getSourceLocation(),
	diag::err_multiple_mem_initialization)
	<< Field->getDeclName()
	<< Init->getSourceRange();
	else {
	const Type *BaseClass = Init->getBaseClass();
	assert(BaseClass && "neither field nor base");
	S.Diag(Init->getSourceLocation(),
	diag::err_multiple_base_initialization)
	<< QualType(BaseClass, 0)
	<< Init->getSourceRange();
	}
	S.Diag(PrevInit->getSourceLocation(), diag::note_previous_initializer)
	<< 0 << PrevInit->getSourceRange();

	return true;
	}

	typedef std::pair<NamedDecl , CXXCtorInitializer > UnionEntry;
	typedef llvm::DenseMap<RecordDecl*, UnionEntry> RedundantUnionMap;

	bool CheckRedundantUnionInit(Sema &S,
	CXXCtorInitializer *Init,
	RedundantUnionMap &Unions) {
	FieldDecl *Field = Init->getAnyMember();
	RecordDecl *Parent = Field->getParent();
	NamedDecl *Child = Field;

	while (Parent->isAnonymousStructOrUnion() \|\| Parent->isUnion()) {
	if (Parent->isUnion()) {
	UnionEntry &En = Unions[Parent];
	if (En.first && En.first != Child) {
	S.Diag(Init->getSourceLocation(),
	diag::err_multiple_mem_union_initialization)
	<< Field->getDeclName()
	<< Init->getSourceRange();
	S.Diag(En.second->getSourceLocation(), diag::note_previous_initializer)
	<< 0 << En.second->getSourceRange();
	return true;
	}
	if (!En.first) {
	En.first = Child;
	En.second = Init;
	}
	if (!Parent->isAnonymousStructOrUnion())
	return false;
	}

	Child = Parent;
	Parent = cast<RecordDecl>(Parent->getDeclContext());
	}

	return false;
	}
	}

	/// ActOnMemInitializers - Handle the member initializers for a constructor.
	void Sema::ActOnMemInitializers(Decl *ConstructorDecl,
	SourceLocation ColonLoc,
	ArrayRef<CXXCtorInitializer*> MemInits,
	bool AnyErrors) {
	if (!ConstructorDecl)
	return;

	AdjustDeclIfTemplate(ConstructorDecl);

	CXXConstructorDecl *Constructor
	= dyn_cast<CXXConstructorDecl>(ConstructorDecl);

	if (!Constructor) {
	Diag(ColonLoc, diag::err_only_constructors_take_base_inits);
	return;
	}

	// Mapping for the duplicate initializers check.
	// For member initializers, this is keyed with a FieldDecl*.
	// For base initializers, this is keyed with a Type*.
	llvm::DenseMap<const void , CXXCtorInitializer > Members;

	// Mapping for the inconsistent anonymous-union initializers check.
	RedundantUnionMap MemberUnions;

	bool HadError = false;
	for (unsigned i = 0; i < MemInits.size(); i++) {
	CXXCtorInitializer *Init = MemInits[i];

	// Set the source order index.
	Init->setSourceOrder(i);

	if (Init->isAnyMemberInitializer()) {
	const void *Key = GetKeyForMember(Context, Init);
	if (CheckRedundantInit(*this, Init, Members[Key]) \|\|
	CheckRedundantUnionInit(*this, Init, MemberUnions))
	HadError = true;
	} else if (Init->isBaseInitializer()) {
	const void *Key = GetKeyForMember(Context, Init);
	if (CheckRedundantInit(*this, Init, Members[Key]))
	HadError = true;
	} else {
	assert(Init->isDelegatingInitializer());
	// This must be the only initializer
	if (MemInits.size() != 1) {
	Diag(Init->getSourceLocation(),
	diag::err_delegating_initializer_alone)
	<< Init->getSourceRange() << MemInits[i ? 0 : 1]->getSourceRange();
	// We will treat this as being the only initializer.
	}
	SetDelegatingInitializer(Constructor, MemInits[i]);
	// Return immediately as the initializer is set.
	return;
	}
	}

	if (HadError)
	return;

	DiagnoseBaseOrMemInitializerOrder(*this, Constructor, MemInits);

	SetCtorInitializers(Constructor, AnyErrors, MemInits);

	DiagnoseUninitializedFields(*this, Constructor);
	}

	void
	Sema::MarkBaseAndMemberDestructorsReferenced(SourceLocation Location,
	CXXRecordDecl *ClassDecl) {
	// Ignore dependent contexts. Also ignore unions, since their members never
	// have destructors implicitly called.
	if (ClassDecl->isDependentContext() \|\| ClassDecl->isUnion())
	return;

	// FIXME: all the access-control diagnostics are positioned on the
	// field/base declaration. That's probably good; that said, the
	// user might reasonably want to know why the destructor is being
	// emitted, and we currently don't say.

	// Non-static data members.
	for (auto *Field : ClassDecl->fields()) {
	if (Field->isInvalidDecl())
	continue;

	// Don't destroy incomplete or zero-length arrays.
	if (isIncompleteOrZeroLengthArrayType(Context, Field->getType()))
	continue;

	QualType FieldType = Context.getBaseElementType(Field->getType());

	const RecordType* RT = FieldType->getAs<RecordType>();
	if (!RT)
	continue;

	CXXRecordDecl *FieldClassDecl = cast<CXXRecordDecl>(RT->getDecl());
	if (FieldClassDecl->isInvalidDecl())
	continue;
	if (FieldClassDecl->hasIrrelevantDestructor())
	continue;
	// The destructor for an implicit anonymous union member is never invoked.
	if (FieldClassDecl->isUnion() && FieldClassDecl->isAnonymousStructOrUnion())
	continue;

	CXXDestructorDecl *Dtor = LookupDestructor(FieldClassDecl);
	assert(Dtor && "No dtor found for FieldClassDecl!");
	CheckDestructorAccess(Field->getLocation(), Dtor,
	PDiag(diag::err_access_dtor_field)
	<< Field->getDeclName()
	<< FieldType);

	MarkFunctionReferenced(Location, Dtor);
	DiagnoseUseOfDecl(Dtor, Location);
	}

	// We only potentially invoke the destructors of potentially constructed
	// subobjects.
	bool VisitVirtualBases = !ClassDecl->isAbstract();

	llvm::SmallPtrSet<const RecordType *, 8> DirectVirtualBases;

	// Bases.
	for (const auto &Base : ClassDecl->bases()) {
	// Bases are always records in a well-formed non-dependent class.
	const RecordType *RT = Base.getType()->getAs<RecordType>();

	// Remember direct virtual bases.
	if (Base.isVirtual()) {
	if (!VisitVirtualBases)
	continue;
	DirectVirtualBases.insert(RT);
	}

	CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(RT->getDecl());
	// If our base class is invalid, we probably can't get its dtor anyway.
	if (BaseClassDecl->isInvalidDecl())
	continue;
	if (BaseClassDecl->hasIrrelevantDestructor())
	continue;

	CXXDestructorDecl *Dtor = LookupDestructor(BaseClassDecl);
	assert(Dtor && "No dtor found for BaseClassDecl!");

	// FIXME: caret should be on the start of the class name
	CheckDestructorAccess(Base.getLocStart(), Dtor,
	PDiag(diag::err_access_dtor_base)
	<< Base.getType()
	<< Base.getSourceRange(),
	Context.getTypeDeclType(ClassDecl));

	MarkFunctionReferenced(Location, Dtor);
	DiagnoseUseOfDecl(Dtor, Location);
	}

	if (!VisitVirtualBases)
	return;

	// Virtual bases.
	for (const auto &VBase : ClassDecl->vbases()) {
	// Bases are always records in a well-formed non-dependent class.
	const RecordType *RT = VBase.getType()->castAs<RecordType>();

	// Ignore direct virtual bases.
	if (DirectVirtualBases.count(RT))
	continue;

	CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(RT->getDecl());
	// If our base class is invalid, we probably can't get its dtor anyway.
	if (BaseClassDecl->isInvalidDecl())
	continue;
	if (BaseClassDecl->hasIrrelevantDestructor())
	continue;

	CXXDestructorDecl *Dtor = LookupDestructor(BaseClassDecl);
	assert(Dtor && "No dtor found for BaseClassDecl!");
	if (CheckDestructorAccess(
	ClassDecl->getLocation(), Dtor,
	PDiag(diag::err_access_dtor_vbase)
	<< Context.getTypeDeclType(ClassDecl) << VBase.getType(),
	Context.getTypeDeclType(ClassDecl)) ==
	AR_accessible) {
	CheckDerivedToBaseConversion(
	Context.getTypeDeclType(ClassDecl), VBase.getType(),
	diag::err_access_dtor_vbase, 0, ClassDecl->getLocation(),
	SourceRange(), DeclarationName(), nullptr);
	}

	MarkFunctionReferenced(Location, Dtor);
	DiagnoseUseOfDecl(Dtor, Location);
	}
	}

	void Sema::ActOnDefaultCtorInitializers(Decl *CDtorDecl) {
	if (!CDtorDecl)
	return;

	if (CXXConstructorDecl *Constructor
	= dyn_cast<CXXConstructorDecl>(CDtorDecl)) {
	SetCtorInitializers(Constructor, /AnyErrors=/false);
	DiagnoseUninitializedFields(*this, Constructor);
	}
	}

	bool Sema::isAbstractType(SourceLocation Loc, QualType T) {
	if (!getLangOpts().CPlusPlus)
	return false;

	const auto *RD = Context.getBaseElementType(T)->getAsCXXRecordDecl();
	if (!RD)
	return false;

	// FIXME: Per [temp.inst]p1, we are supposed to trigger instantiation of a
	// class template specialization here, but doing so breaks a lot of code.

	// We can't answer whether something is abstract until it has a
	// definition. If it's currently being defined, we'll walk back
	// over all the declarations when we have a full definition.
	const CXXRecordDecl *Def = RD->getDefinition();
	if (!Def \|\| Def->isBeingDefined())
	return false;

	return RD->isAbstract();
	}

	bool Sema::RequireNonAbstractType(SourceLocation Loc, QualType T,
	TypeDiagnoser &Diagnoser) {
	if (!isAbstractType(Loc, T))
	return false;

	T = Context.getBaseElementType(T);
	Diagnoser.diagnose(*this, Loc, T);
	DiagnoseAbstractType(T->getAsCXXRecordDecl());
	return true;
	}

	void Sema::DiagnoseAbstractType(const CXXRecordDecl *RD) {
	// Check if we've already emitted the list of pure virtual functions
	// for this class.
	if (PureVirtualClassDiagSet && PureVirtualClassDiagSet->count(RD))
	return;

	// If the diagnostic is suppressed, don't emit the notes. We're only
	// going to emit them once, so try to attach them to a diagnostic we're
	// actually going to show.
	if (Diags.isLastDiagnosticIgnored())
	return;

	CXXFinalOverriderMap FinalOverriders;
	RD->getFinalOverriders(FinalOverriders);

	// Keep a set of seen pure methods so we won't diagnose the same method
	// more than once.
	llvm::SmallPtrSet<const CXXMethodDecl *, 8> SeenPureMethods;

	for (CXXFinalOverriderMap::iterator M = FinalOverriders.begin(),
	MEnd = FinalOverriders.end();
	M != MEnd;
	++M) {
	for (OverridingMethods::iterator SO = M->second.begin(),
	SOEnd = M->second.end();
	SO != SOEnd; ++SO) {
	// C++ [class.abstract]p4:
	// A class is abstract if it contains or inherits at least one
	// pure virtual function for which the final overrider is pure
	// virtual.

	//
	if (SO->second.size() != 1)
	continue;

	if (!SO->second.front().Method->isPure())
	continue;

	if (!SeenPureMethods.insert(SO->second.front().Method).second)
	continue;

	Diag(SO->second.front().Method->getLocation(),
	diag::note_pure_virtual_function)
	<< SO->second.front().Method->getDeclName() << RD->getDeclName();
	}
	}

	if (!PureVirtualClassDiagSet)
	PureVirtualClassDiagSet.reset(new RecordDeclSetTy);
	PureVirtualClassDiagSet->insert(RD);
	}

	namespace {
	struct AbstractUsageInfo {
	Sema &S;
	CXXRecordDecl *Record;
	CanQualType AbstractType;
	bool Invalid;

	AbstractUsageInfo(Sema &S, CXXRecordDecl *Record)
	: S(S), Record(Record),
	AbstractType(S.Context.getCanonicalType(
	S.Context.getTypeDeclType(Record))),
	Invalid(false) {}

	void DiagnoseAbstractType() {
	if (Invalid) return;
	S.DiagnoseAbstractType(Record);
	Invalid = true;
	}

	void CheckType(const NamedDecl *D, TypeLoc TL, Sema::AbstractDiagSelID Sel);
	};

	struct CheckAbstractUsage {
	AbstractUsageInfo &Info;
	const NamedDecl *Ctx;

	CheckAbstractUsage(AbstractUsageInfo &Info, const NamedDecl *Ctx)
	: Info(Info), Ctx(Ctx) {}

	void Visit(TypeLoc TL, Sema::AbstractDiagSelID Sel) {
	switch (TL.getTypeLocClass()) {
	#define ABSTRACT_TYPELOC(CLASS, PARENT)
	#define TYPELOC(CLASS, PARENT) \
	case TypeLoc::CLASS: Check(TL.castAs<CLASS##TypeLoc>(), Sel); break;
	#include "clang/AST/TypeLocNodes.def"
	}
	}

	void Check(FunctionProtoTypeLoc TL, Sema::AbstractDiagSelID Sel) {
	Visit(TL.getReturnLoc(), Sema::AbstractReturnType);
	for (unsigned I = 0, E = TL.getNumParams(); I != E; ++I) {
	if (!TL.getParam(I))
	continue;

	TypeSourceInfo *TSI = TL.getParam(I)->getTypeSourceInfo();
	if (TSI) Visit(TSI->getTypeLoc(), Sema::AbstractParamType);
	}
	}

	void Check(ArrayTypeLoc TL, Sema::AbstractDiagSelID Sel) {
	Visit(TL.getElementLoc(), Sema::AbstractArrayType);
	}

	void Check(TemplateSpecializationTypeLoc TL, Sema::AbstractDiagSelID Sel) {
	// Visit the type parameters from a permissive context.
	for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I) {
	TemplateArgumentLoc TAL = TL.getArgLoc(I);
	if (TAL.getArgument().getKind() == TemplateArgument::Type)
	if (TypeSourceInfo *TSI = TAL.getTypeSourceInfo())
	Visit(TSI->getTypeLoc(), Sema::AbstractNone);
	// TODO: other template argument types?
	}
	}

	// Visit pointee types from a permissive context.
	#define CheckPolymorphic(Type) \
	void Check(Type TL, Sema::AbstractDiagSelID Sel) { \
	Visit(TL.getNextTypeLoc(), Sema::AbstractNone); \
	}
	CheckPolymorphic(PointerTypeLoc)
	CheckPolymorphic(ReferenceTypeLoc)
	CheckPolymorphic(MemberPointerTypeLoc)
	CheckPolymorphic(BlockPointerTypeLoc)
	CheckPolymorphic(AtomicTypeLoc)

	/// Handle all the types we haven't given a more specific
	/// implementation for above.
	void Check(TypeLoc TL, Sema::AbstractDiagSelID Sel) {
	// Every other kind of type that we haven't called out already
	// that has an inner type is either (1) sugar or (2) contains that
	// inner type in some way as a subobject.
	if (TypeLoc Next = TL.getNextTypeLoc())
	return Visit(Next, Sel);

	// If there's no inner type and we're in a permissive context,
	// don't diagnose.
	if (Sel == Sema::AbstractNone) return;

	// Check whether the type matches the abstract type.
	QualType T = TL.getType();
	if (T->isArrayType()) {
	Sel = Sema::AbstractArrayType;
	T = Info.S.Context.getBaseElementType(T);
	}
	CanQualType CT = T->getCanonicalTypeUnqualified().getUnqualifiedType();
	if (CT != Info.AbstractType) return;

	// It matched; do some magic.
	if (Sel == Sema::AbstractArrayType) {
	Info.S.Diag(Ctx->getLocation(), diag::err_array_of_abstract_type)
	<< T << TL.getSourceRange();
	} else {
	Info.S.Diag(Ctx->getLocation(), diag::err_abstract_type_in_decl)
	<< Sel << T << TL.getSourceRange();
	}
	Info.DiagnoseAbstractType();
	}
	};

	void AbstractUsageInfo::CheckType(const NamedDecl *D, TypeLoc TL,
	Sema::AbstractDiagSelID Sel) {
	CheckAbstractUsage(*this, D).Visit(TL, Sel);
	}

	}

	/// Check for invalid uses of an abstract type in a method declaration.
	static void CheckAbstractClassUsage(AbstractUsageInfo &Info,
	CXXMethodDecl *MD) {
	// No need to do the check on definitions, which require that
	// the return/param types be complete.
	if (MD->doesThisDeclarationHaveABody())
	return;

	// For safety's sake, just ignore it if we don't have type source
	// information. This should never happen for non-implicit methods,
	// but...
	if (TypeSourceInfo *TSI = MD->getTypeSourceInfo())
	Info.CheckType(MD, TSI->getTypeLoc(), Sema::AbstractNone);
	}

	/// Check for invalid uses of an abstract type within a class definition.
	static void CheckAbstractClassUsage(AbstractUsageInfo &Info,
	CXXRecordDecl *RD) {
	for (auto *D : RD->decls()) {
	if (D->isImplicit()) continue;

	// Methods and method templates.
	if (isa<CXXMethodDecl>(D)) {
	CheckAbstractClassUsage(Info, cast<CXXMethodDecl>(D));
	} else if (isa<FunctionTemplateDecl>(D)) {
	FunctionDecl *FD = cast<FunctionTemplateDecl>(D)->getTemplatedDecl();
	CheckAbstractClassUsage(Info, cast<CXXMethodDecl>(FD));

	// Fields and static variables.
	} else if (isa<FieldDecl>(D)) {
	FieldDecl *FD = cast<FieldDecl>(D);
	if (TypeSourceInfo *TSI = FD->getTypeSourceInfo())
	Info.CheckType(FD, TSI->getTypeLoc(), Sema::AbstractFieldType);
	} else if (isa<VarDecl>(D)) {
	VarDecl *VD = cast<VarDecl>(D);
	if (TypeSourceInfo *TSI = VD->getTypeSourceInfo())
	Info.CheckType(VD, TSI->getTypeLoc(), Sema::AbstractVariableType);

	// Nested classes and class templates.
	} else if (isa<CXXRecordDecl>(D)) {
	CheckAbstractClassUsage(Info, cast<CXXRecordDecl>(D));
	} else if (isa<ClassTemplateDecl>(D)) {
	CheckAbstractClassUsage(Info,
	cast<ClassTemplateDecl>(D)->getTemplatedDecl());
	}
	}
	}

	static void ReferenceDllExportedMethods(Sema &S, CXXRecordDecl *Class) {
	Attr *ClassAttr = getDLLAttr(Class);
	if (!ClassAttr)
	return;

	assert(ClassAttr->getKind() == attr::DLLExport);

	TemplateSpecializationKind TSK = Class->getTemplateSpecializationKind();

	if (TSK == TSK_ExplicitInstantiationDeclaration)
	// Don't go any further if this is just an explicit instantiation
	// declaration.
	return;

	for (Decl *Member : Class->decls()) {
	auto *MD = dyn_cast<CXXMethodDecl>(Member);
	if (!MD)
	continue;

	if (Member->getAttr<DLLExportAttr>()) {
	if (MD->isUserProvided()) {
	// Instantiate non-default class member functions ...

	// .. except for certain kinds of template specializations.
	if (TSK == TSK_ImplicitInstantiation && !ClassAttr->isInherited())
	continue;

	S.MarkFunctionReferenced(Class->getLocation(), MD);

	// The function will be passed to the consumer when its definition is
	// encountered.
	} else if (!MD->isTrivial() \|\| MD->isExplicitlyDefaulted() \|\|
	MD->isCopyAssignmentOperator() \|\|
	MD->isMoveAssignmentOperator()) {
	// Synthesize and instantiate non-trivial implicit methods, explicitly
	// defaulted methods, and the copy and move assignment operators. The
	// latter are exported even if they are trivial, because the address of
	// an operator can be taken and should compare equal across libraries.
	DiagnosticErrorTrap Trap(S.Diags);
	S.MarkFunctionReferenced(Class->getLocation(), MD);
	if (Trap.hasErrorOccurred()) {
	S.Diag(ClassAttr->getLocation(), diag::note_due_to_dllexported_class)
	<< Class->getName() << !S.getLangOpts().CPlusPlus11;
	break;
	}

	// There is no later point when we will see the definition of this
	// function, so pass it to the consumer now.
	S.Consumer.HandleTopLevelDecl(DeclGroupRef(MD));
	}
	}
	}
	}

	static void checkForMultipleExportedDefaultConstructors(Sema &S,
	CXXRecordDecl *Class) {
	// Only the MS ABI has default constructor closures, so we don't need to do
	// this semantic checking anywhere else.
	if (!S.Context.getTargetInfo().getCXXABI().isMicrosoft())
	return;

	CXXConstructorDecl *LastExportedDefaultCtor = nullptr;
	for (Decl *Member : Class->decls()) {
	// Look for exported default constructors.
	auto *CD = dyn_cast<CXXConstructorDecl>(Member);
	if (!CD \|\| !CD->isDefaultConstructor())
	continue;
	auto *Attr = CD->getAttr<DLLExportAttr>();
	if (!Attr)
	continue;

	// If the class is non-dependent, mark the default arguments as ODR-used so
	// that we can properly codegen the constructor closure.
	if (!Class->isDependentContext()) {
	for (ParmVarDecl *PD : CD->parameters()) {
	(void)S.CheckCXXDefaultArgExpr(Attr->getLocation(), CD, PD);
	S.DiscardCleanupsInEvaluationContext();
	}
	}

	if (LastExportedDefaultCtor) {
	S.Diag(LastExportedDefaultCtor->getLocation(),
	diag::err_attribute_dll_ambiguous_default_ctor)
	<< Class;
	S.Diag(CD->getLocation(), diag::note_entity_declared_at)
	<< CD->getDeclName();
	return;
	}
	LastExportedDefaultCtor = CD;
	}
	}

	/// \brief Check class-level dllimport/dllexport attribute.
	void Sema::checkClassLevelDLLAttribute(CXXRecordDecl *Class) {
	Attr *ClassAttr = getDLLAttr(Class);

	// MSVC inherits DLL attributes to partial class template specializations.
	if (Context.getTargetInfo().getCXXABI().isMicrosoft() && !ClassAttr) {
	if (auto *Spec = dyn_cast<ClassTemplatePartialSpecializationDecl>(Class)) {
	if (Attr *TemplateAttr =
	getDLLAttr(Spec->getSpecializedTemplate()->getTemplatedDecl())) {
	auto *A = cast<InheritableAttr>(TemplateAttr->clone(getASTContext()));
	A->setInherited(true);
	ClassAttr = A;
	}
	}
	}

	if (!ClassAttr)
	return;

	if (!Class->isExternallyVisible()) {
	Diag(Class->getLocation(), diag::err_attribute_dll_not_extern)
	<< Class << ClassAttr;
	return;
	}

	if (Context.getTargetInfo().getCXXABI().isMicrosoft() &&
	!ClassAttr->isInherited()) {
	// Diagnose dll attributes on members of class with dll attribute.
	for (Decl *Member : Class->decls()) {
	if (!isa<VarDecl>(Member) && !isa<CXXMethodDecl>(Member))
	continue;
	InheritableAttr *MemberAttr = getDLLAttr(Member);
	if (!MemberAttr \|\| MemberAttr->isInherited() \|\| Member->isInvalidDecl())
	continue;

	Diag(MemberAttr->getLocation(),
	diag::err_attribute_dll_member_of_dll_class)
	<< MemberAttr << ClassAttr;
	Diag(ClassAttr->getLocation(), diag::note_previous_attribute);
	Member->setInvalidDecl();
	}
	}

	if (Class->getDescribedClassTemplate())
	// Don't inherit dll attribute until the template is instantiated.
	return;

	// The class is either imported or exported.
	const bool ClassExported = ClassAttr->getKind() == attr::DLLExport;

	TemplateSpecializationKind TSK = Class->getTemplateSpecializationKind();

	// Ignore explicit dllexport on explicit class template instantiation declarations.
	if (ClassExported && !ClassAttr->isInherited() &&
	TSK == TSK_ExplicitInstantiationDeclaration) {
	Class->dropAttr<DLLExportAttr>();
	return;
	}

	// Force declaration of implicit members so they can inherit the attribute.
	ForceDeclarationOfImplicitMembers(Class);

	// FIXME: MSVC's docs say all bases must be exportable, but this doesn't
	// seem to be true in practice?

	for (Decl *Member : Class->decls()) {
	VarDecl *VD = dyn_cast<VarDecl>(Member);
	CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Member);

	// Only methods and static fields inherit the attributes.
	if (!VD && !MD)
	continue;

	if (MD) {
	// Don't process deleted methods.
	if (MD->isDeleted())
	continue;

	if (MD->isInlined()) {
	// MinGW does not import or export inline methods.
	if (!Context.getTargetInfo().getCXXABI().isMicrosoft() &&
	!Context.getTargetInfo().getTriple().isWindowsItaniumEnvironment())
	continue;

	// MSVC versions before 2015 don't export the move assignment operators
	// and move constructor, so don't attempt to import/export them if
	// we have a definition.
	auto *Ctor = dyn_cast<CXXConstructorDecl>(MD);
	if ((MD->isMoveAssignmentOperator() \|\|
	(Ctor && Ctor->isMoveConstructor())) &&
	!getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015))
	continue;

	// MSVC2015 doesn't export trivial defaulted x-tor but copy assign
	// operator is exported anyway.
	if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015) &&
	(Ctor \|\| isa<CXXDestructorDecl>(MD)) && MD->isTrivial())
	continue;
	}
	}

	if (!cast<NamedDecl>(Member)->isExternallyVisible())
	continue;

	if (!getDLLAttr(Member)) {
	auto *NewAttr =
	cast<InheritableAttr>(ClassAttr->clone(getASTContext()));
	NewAttr->setInherited(true);
	Member->addAttr(NewAttr);
	}
	}

	if (ClassExported)
	DelayedDllExportClasses.push_back(Class);
	}

	/// \brief Perform propagation of DLL attributes from a derived class to a
	/// templated base class for MS compatibility.
	void Sema::propagateDLLAttrToBaseClassTemplate(
	CXXRecordDecl Class, Attr ClassAttr,
	ClassTemplateSpecializationDecl *BaseTemplateSpec, SourceLocation BaseLoc) {
	if (getDLLAttr(
	BaseTemplateSpec->getSpecializedTemplate()->getTemplatedDecl())) {
	// If the base class template has a DLL attribute, don't try to change it.
	return;
	}

	auto TSK = BaseTemplateSpec->getSpecializationKind();
	if (!getDLLAttr(BaseTemplateSpec) &&
	(TSK == TSK_Undeclared \|\| TSK == TSK_ExplicitInstantiationDeclaration \|\|
	TSK == TSK_ImplicitInstantiation)) {
	// The template hasn't been instantiated yet (or it has, but only as an
	// explicit instantiation declaration or implicit instantiation, which means
	// we haven't codegenned any members yet), so propagate the attribute.
	auto *NewAttr = cast<InheritableAttr>(ClassAttr->clone(getASTContext()));
	NewAttr->setInherited(true);
	BaseTemplateSpec->addAttr(NewAttr);

	// If the template is already instantiated, checkDLLAttributeRedeclaration()
	// needs to be run again to work see the new attribute. Otherwise this will
	// get run whenever the template is instantiated.
	if (TSK != TSK_Undeclared)
	checkClassLevelDLLAttribute(BaseTemplateSpec);

	return;
	}

	if (getDLLAttr(BaseTemplateSpec)) {
	// The template has already been specialized or instantiated with an
	// attribute, explicitly or through propagation. We should not try to change
	// it.
	return;
	}

	// The template was previously instantiated or explicitly specialized without
	// a dll attribute, It's too late for us to add an attribute, so warn that
	// this is unsupported.
	Diag(BaseLoc, diag::warn_attribute_dll_instantiated_base_class)
	<< BaseTemplateSpec->isExplicitSpecialization();
	Diag(ClassAttr->getLocation(), diag::note_attribute);
	if (BaseTemplateSpec->isExplicitSpecialization()) {
	Diag(BaseTemplateSpec->getLocation(),
	diag::note_template_class_explicit_specialization_was_here)
	<< BaseTemplateSpec;
	} else {
	Diag(BaseTemplateSpec->getPointOfInstantiation(),
	diag::note_template_class_instantiation_was_here)
	<< BaseTemplateSpec;
	}
	}

	static void DefineImplicitSpecialMember(Sema &S, CXXMethodDecl *MD,
	SourceLocation DefaultLoc) {
	switch (S.getSpecialMember(MD)) {
	case Sema::CXXDefaultConstructor:
	S.DefineImplicitDefaultConstructor(DefaultLoc,
	cast<CXXConstructorDecl>(MD));
	break;
	case Sema::CXXCopyConstructor:
	S.DefineImplicitCopyConstructor(DefaultLoc, cast<CXXConstructorDecl>(MD));
	break;
	case Sema::CXXCopyAssignment:
	S.DefineImplicitCopyAssignment(DefaultLoc, MD);
	break;
	case Sema::CXXDestructor:
	S.DefineImplicitDestructor(DefaultLoc, cast<CXXDestructorDecl>(MD));
	break;
	case Sema::CXXMoveConstructor:
	S.DefineImplicitMoveConstructor(DefaultLoc, cast<CXXConstructorDecl>(MD));
	break;
	case Sema::CXXMoveAssignment:
	S.DefineImplicitMoveAssignment(DefaultLoc, MD);
	break;
	case Sema::CXXInvalid:
	llvm_unreachable("Invalid special member.");
	}
	}

	+/// Determine whether a type is permitted to be passed or returned in
	+/// registers, per C++ [class.temporary]p3.
	+static bool computeCanPassInRegisters(Sema &S, CXXRecordDecl *D) {
	+ if (D->isDependentType() \|\| D->isInvalidDecl())
	+ return false;
	+
	+ // Per C++ [class.temporary]p3, the relevant condition is:
	+ // each copy constructor, move constructor, and destructor of X is
	+ // either trivial or deleted, and X has at least one non-deleted copy
	+ // or move constructor
	+ bool HasNonDeletedCopyOrMove = false;
	+
	+ if (D->needsImplicitCopyConstructor() &&
	+ !D->defaultedCopyConstructorIsDeleted()) {
	+ if (!D->hasTrivialCopyConstructor())
	+ return false;
	+ HasNonDeletedCopyOrMove = true;
	+ }
	+
	+ if (S.getLangOpts().CPlusPlus11 && D->needsImplicitMoveConstructor() &&
	+ !D->defaultedMoveConstructorIsDeleted()) {
	+ if (!D->hasTrivialMoveConstructor())
	+ return false;
	+ HasNonDeletedCopyOrMove = true;
	+ }
	+
	+ if (D->needsImplicitDestructor() && !D->defaultedDestructorIsDeleted() &&
	+ !D->hasTrivialDestructor())
	+ return false;
	+
	+ for (const CXXMethodDecl *MD : D->methods()) {
	+ if (MD->isDeleted())
	+ continue;
	+
	+ auto *CD = dyn_cast<CXXConstructorDecl>(MD);
	+ if (CD && CD->isCopyOrMoveConstructor())
	+ HasNonDeletedCopyOrMove = true;
	+ else if (!isa<CXXDestructorDecl>(MD))
	+ continue;
	+
	+ if (!MD->isTrivial())
	+ return false;
	+ }
	+
	+ return HasNonDeletedCopyOrMove;
	+}
	+
	/// \brief Perform semantic checks on a class definition that has been
	/// completing, introducing implicitly-declared members, checking for
	/// abstract types, etc.
	void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) {
	if (!Record)
	return;

	if (Record->isAbstract() && !Record->isInvalidDecl()) {
	AbstractUsageInfo Info(*this, Record);
	CheckAbstractClassUsage(Info, Record);
	}

	// If this is not an aggregate type and has no user-declared constructor,
	// complain about any non-static data members of reference or const scalar
	// type, since they will never get initializers.
	if (!Record->isInvalidDecl() && !Record->isDependentType() &&
	!Record->isAggregate() && !Record->hasUserDeclaredConstructor() &&
	!Record->isLambda()) {
	bool Complained = false;
	for (const auto *F : Record->fields()) {
	if (F->hasInClassInitializer() \|\| F->isUnnamedBitfield())
	continue;

	if (F->getType()->isReferenceType() \|\|
	(F->getType().isConstQualified() && F->getType()->isScalarType())) {
	if (!Complained) {
	Diag(Record->getLocation(), diag::warn_no_constructor_for_refconst)
	<< Record->getTagKind() << Record;
	Complained = true;
	}

	Diag(F->getLocation(), diag::note_refconst_member_not_initialized)
	<< F->getType()->isReferenceType()
	<< F->getDeclName();
	}
	}
	}

	if (Record->getIdentifier()) {
	// C++ [class.mem]p13:
	// If T is the name of a class, then each of the following shall have a
	// name different from T:
	// - every member of every anonymous union that is a member of class T.
	//
	// C++ [class.mem]p14:
	// In addition, if class T has a user-declared constructor (12.1), every
	// non-static data member of class T shall have a name different from T.
	DeclContext::lookup_result R = Record->lookup(Record->getDeclName());
	for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E;
	++I) {
	NamedDecl D = I;
	if ((isa<FieldDecl>(D) && Record->hasUserDeclaredConstructor()) \|\|
	isa<IndirectFieldDecl>(D)) {
	Diag(D->getLocation(), diag::err_member_name_of_class)
	<< D->getDeclName();
	break;
	}
	}
	}

	// Warn if the class has virtual methods but non-virtual public destructor.
	if (Record->isPolymorphic() && !Record->isDependentType()) {
	CXXDestructorDecl *dtor = Record->getDestructor();
	if ((!dtor \|\| (!dtor->isVirtual() && dtor->getAccess() == AS_public)) &&
	!Record->hasAttr<FinalAttr>())
	Diag(dtor ? dtor->getLocation() : Record->getLocation(),
	diag::warn_non_virtual_dtor) << Context.getRecordType(Record);
	}

	if (Record->isAbstract()) {
	if (FinalAttr *FA = Record->getAttr<FinalAttr>()) {
	Diag(Record->getLocation(), diag::warn_abstract_final_class)
	<< FA->isSpelledAsSealed();
	DiagnoseAbstractType(Record);
	}
	}

	bool HasMethodWithOverrideControl = false,
	HasOverridingMethodWithoutOverrideControl = false;
	if (!Record->isDependentType()) {
	for (auto *M : Record->methods()) {
	// See if a method overloads virtual methods in a base
	// class without overriding any.
	if (!M->isStatic())
	DiagnoseHiddenVirtualMethods(M);
	if (M->hasAttr<OverrideAttr>())
	HasMethodWithOverrideControl = true;
	else if (M->size_overridden_methods() > 0)
	HasOverridingMethodWithoutOverrideControl = true;
	// Check whether the explicitly-defaulted special members are valid.
	if (!M->isInvalidDecl() && M->isExplicitlyDefaulted())
	CheckExplicitlyDefaultedSpecialMember(M);

	// For an explicitly defaulted or deleted special member, we defer
	// determining triviality until the class is complete. That time is now!
	CXXSpecialMember CSM = getSpecialMember(M);
	if (!M->isImplicit() && !M->isUserProvided()) {
	if (CSM != CXXInvalid) {
	M->setTrivial(SpecialMemberIsTrivial(M, CSM));

	// Inform the class that we've finished declaring this member.
	Record->finishedDefaultedOrDeletedMember(M);
	}
	}

	if (!M->isInvalidDecl() && M->isExplicitlyDefaulted() &&
	M->hasAttr<DLLExportAttr>()) {
	if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015) &&
	M->isTrivial() &&
	(CSM == CXXDefaultConstructor \|\| CSM == CXXCopyConstructor \|\|
	CSM == CXXDestructor))
	M->dropAttr<DLLExportAttr>();

	if (M->hasAttr<DLLExportAttr>()) {
	DefineImplicitSpecialMember(*this, M, M->getLocation());
	ActOnFinishInlineFunctionDef(M);
	}
	}
	}
	}

	if (HasMethodWithOverrideControl &&
	HasOverridingMethodWithoutOverrideControl) {
	// At least one method has the 'override' control declared.
	// Diagnose all other overridden methods which do not have 'override' specified on them.
	for (auto *M : Record->methods())
	DiagnoseAbsenceOfOverrideControl(M);
	}

	// ms_struct is a request to use the same ABI rules as MSVC. Check
	// whether this class uses any C++ features that are implemented
	// completely differently in MSVC, and if so, emit a diagnostic.
	// That diagnostic defaults to an error, but we allow projects to
	// map it down to a warning (or ignore it). It's a fairly common
	// practice among users of the ms_struct pragma to mass-annotate
	// headers, sweeping up a bunch of types that the project doesn't
	// really rely on MSVC-compatible layout for. We must therefore
	// support "ms_struct except for C++ stuff" as a secondary ABI.
	if (Record->isMsStruct(Context) &&
	(Record->isPolymorphic() \|\| Record->getNumBases())) {
	Diag(Record->getLocation(), diag::warn_cxx_ms_struct);
	}

	checkClassLevelDLLAttribute(Record);
	+
	+ Record->setCanPassInRegisters(computeCanPassInRegisters(*this, Record));
	}

	/// Look up the special member function that would be called by a special
	/// member function for a subobject of class type.
	///
	/// \param Class The class type of the subobject.
	/// \param CSM The kind of special member function.
	/// \param FieldQuals If the subobject is a field, its cv-qualifiers.
	/// \param ConstRHS True if this is a copy operation with a const object
	/// on its RHS, that is, if the argument to the outer special member
	/// function is 'const' and this is not a field marked 'mutable'.
	static Sema::SpecialMemberOverloadResult lookupCallFromSpecialMember(
	Sema &S, CXXRecordDecl *Class, Sema::CXXSpecialMember CSM,
	unsigned FieldQuals, bool ConstRHS) {
	unsigned LHSQuals = 0;
	if (CSM == Sema::CXXCopyAssignment \|\| CSM == Sema::CXXMoveAssignment)
	LHSQuals = FieldQuals;

	unsigned RHSQuals = FieldQuals;
	if (CSM == Sema::CXXDefaultConstructor \|\| CSM == Sema::CXXDestructor)
	RHSQuals = 0;
	else if (ConstRHS)
	RHSQuals \|= Qualifiers::Const;

	return S.LookupSpecialMember(Class, CSM,
	RHSQuals & Qualifiers::Const,
	RHSQuals & Qualifiers::Volatile,
	false,
	LHSQuals & Qualifiers::Const,
	LHSQuals & Qualifiers::Volatile);
	}

	class Sema::InheritedConstructorInfo {
	Sema &S;
	SourceLocation UseLoc;

	/// A mapping from the base classes through which the constructor was
	/// inherited to the using shadow declaration in that base class (or a null
	/// pointer if the constructor was declared in that base class).
	llvm::DenseMap<CXXRecordDecl , ConstructorUsingShadowDecl >
	InheritedFromBases;

	public:
	InheritedConstructorInfo(Sema &S, SourceLocation UseLoc,
	ConstructorUsingShadowDecl *Shadow)
	: S(S), UseLoc(UseLoc) {
	bool DiagnosedMultipleConstructedBases = false;
	CXXRecordDecl *ConstructedBase = nullptr;
	UsingDecl *ConstructedBaseUsing = nullptr;

	// Find the set of such base class subobjects and check that there's a
	// unique constructed subobject.
	for (auto *D : Shadow->redecls()) {
	auto *DShadow = cast<ConstructorUsingShadowDecl>(D);
	auto *DNominatedBase = DShadow->getNominatedBaseClass();
	auto *DConstructedBase = DShadow->getConstructedBaseClass();

	InheritedFromBases.insert(
	std::make_pair(DNominatedBase->getCanonicalDecl(),
	DShadow->getNominatedBaseClassShadowDecl()));
	if (DShadow->constructsVirtualBase())
	InheritedFromBases.insert(
	std::make_pair(DConstructedBase->getCanonicalDecl(),
	DShadow->getConstructedBaseClassShadowDecl()));
	else
	assert(DNominatedBase == DConstructedBase);

	// [class.inhctor.init]p2:
	// If the constructor was inherited from multiple base class subobjects
	// of type B, the program is ill-formed.
	if (!ConstructedBase) {
	ConstructedBase = DConstructedBase;
	ConstructedBaseUsing = D->getUsingDecl();
	} else if (ConstructedBase != DConstructedBase &&
	!Shadow->isInvalidDecl()) {
	if (!DiagnosedMultipleConstructedBases) {
	S.Diag(UseLoc, diag::err_ambiguous_inherited_constructor)
	<< Shadow->getTargetDecl();
	S.Diag(ConstructedBaseUsing->getLocation(),
	diag::note_ambiguous_inherited_constructor_using)
	<< ConstructedBase;
	DiagnosedMultipleConstructedBases = true;
	}
	S.Diag(D->getUsingDecl()->getLocation(),
	diag::note_ambiguous_inherited_constructor_using)
	<< DConstructedBase;
	}
	}

	if (DiagnosedMultipleConstructedBases)
	Shadow->setInvalidDecl();
	}

	/// Find the constructor to use for inherited construction of a base class,
	/// and whether that base class constructor inherits the constructor from a
	/// virtual base class (in which case it won't actually invoke it).
	std::pair<CXXConstructorDecl *, bool>
	findConstructorForBase(CXXRecordDecl Base, CXXConstructorDecl Ctor) const {
	auto It = InheritedFromBases.find(Base->getCanonicalDecl());
	if (It == InheritedFromBases.end())
	return std::make_pair(nullptr, false);

	// This is an intermediary class.
	if (It->second)
	return std::make_pair(
	S.findInheritingConstructor(UseLoc, Ctor, It->second),
	It->second->constructsVirtualBase());

	// This is the base class from which the constructor was inherited.
	return std::make_pair(Ctor, false);
	}
	};

	/// Is the special member function which would be selected to perform the
	/// specified operation on the specified class type a constexpr constructor?
	static bool
	specialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl,
	Sema::CXXSpecialMember CSM, unsigned Quals,
	bool ConstRHS,
	CXXConstructorDecl *InheritedCtor = nullptr,
	Sema::InheritedConstructorInfo *Inherited = nullptr) {
	// If we're inheriting a constructor, see if we need to call it for this base
	// class.
	if (InheritedCtor) {
	assert(CSM == Sema::CXXDefaultConstructor);
	auto BaseCtor =
	Inherited->findConstructorForBase(ClassDecl, InheritedCtor).first;
	if (BaseCtor)
	return BaseCtor->isConstexpr();
	}

	if (CSM == Sema::CXXDefaultConstructor)
	return ClassDecl->hasConstexprDefaultConstructor();

	Sema::SpecialMemberOverloadResult SMOR =
	lookupCallFromSpecialMember(S, ClassDecl, CSM, Quals, ConstRHS);
	if (!SMOR.getMethod())
	// A constructor we wouldn't select can't be "involved in initializing"
	// anything.
	return true;
	return SMOR.getMethod()->isConstexpr();
	}

	/// Determine whether the specified special member function would be constexpr
	/// if it were implicitly defined.
	static bool defaultedSpecialMemberIsConstexpr(
	Sema &S, CXXRecordDecl *ClassDecl, Sema::CXXSpecialMember CSM,
	bool ConstArg, CXXConstructorDecl *InheritedCtor = nullptr,
	Sema::InheritedConstructorInfo *Inherited = nullptr) {
	if (!S.getLangOpts().CPlusPlus11)
	return false;

	// C++11 [dcl.constexpr]p4:
	// In the definition of a constexpr constructor [...]
	bool Ctor = true;
	switch (CSM) {
	case Sema::CXXDefaultConstructor:
	if (Inherited)
	break;
	// Since default constructor lookup is essentially trivial (and cannot
	// involve, for instance, template instantiation), we compute whether a
	// defaulted default constructor is constexpr directly within CXXRecordDecl.
	//
	// This is important for performance; we need to know whether the default
	// constructor is constexpr to determine whether the type is a literal type.
	return ClassDecl->defaultedDefaultConstructorIsConstexpr();

	case Sema::CXXCopyConstructor:
	case Sema::CXXMoveConstructor:
	// For copy or move constructors, we need to perform overload resolution.
	break;

	case Sema::CXXCopyAssignment:
	case Sema::CXXMoveAssignment:
	if (!S.getLangOpts().CPlusPlus14)
	return false;
	// In C++1y, we need to perform overload resolution.
	Ctor = false;
	break;

	case Sema::CXXDestructor:
	case Sema::CXXInvalid:
	return false;
	}

	// -- if the class is a non-empty union, or for each non-empty anonymous
	// union member of a non-union class, exactly one non-static data member
	// shall be initialized; [DR1359]
	//
	// If we squint, this is guaranteed, since exactly one non-static data member
	// will be initialized (if the constructor isn't deleted), we just don't know
	// which one.
	if (Ctor && ClassDecl->isUnion())
	return CSM == Sema::CXXDefaultConstructor
	? ClassDecl->hasInClassInitializer() \|\|
	!ClassDecl->hasVariantMembers()
	: true;

	// -- the class shall not have any virtual base classes;
	if (Ctor && ClassDecl->getNumVBases())
	return false;

	// C++1y [class.copy]p26:
	// -- [the class] is a literal type, and
	if (!Ctor && !ClassDecl->isLiteral())
	return false;

	// -- every constructor involved in initializing [...] base class
	// sub-objects shall be a constexpr constructor;
	// -- the assignment operator selected to copy/move each direct base
	// class is a constexpr function, and
	for (const auto &B : ClassDecl->bases()) {
	const RecordType *BaseType = B.getType()->getAs<RecordType>();
	if (!BaseType) continue;

	CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
	if (!specialMemberIsConstexpr(S, BaseClassDecl, CSM, 0, ConstArg,
	InheritedCtor, Inherited))
	return false;
	}

	// -- every constructor involved in initializing non-static data members
	// [...] shall be a constexpr constructor;
	// -- every non-static data member and base class sub-object shall be
	// initialized
	// -- for each non-static data member of X that is of class type (or array
	// thereof), the assignment operator selected to copy/move that member is
	// a constexpr function
	for (const auto *F : ClassDecl->fields()) {
	if (F->isInvalidDecl())
	continue;
	if (CSM == Sema::CXXDefaultConstructor && F->hasInClassInitializer())
	continue;
	QualType BaseType = S.Context.getBaseElementType(F->getType());
	if (const RecordType *RecordTy = BaseType->getAs<RecordType>()) {
	CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(RecordTy->getDecl());
	if (!specialMemberIsConstexpr(S, FieldRecDecl, CSM,
	BaseType.getCVRQualifiers(),
	ConstArg && !F->isMutable()))
	return false;
	} else if (CSM == Sema::CXXDefaultConstructor) {
	return false;
	}
	}

	// All OK, it's constexpr!
	return true;
	}

	static Sema::ImplicitExceptionSpecification
	ComputeDefaultedSpecialMemberExceptionSpec(
	Sema &S, SourceLocation Loc, CXXMethodDecl *MD, Sema::CXXSpecialMember CSM,
	Sema::InheritedConstructorInfo *ICI);

	static Sema::ImplicitExceptionSpecification
	computeImplicitExceptionSpec(Sema &S, SourceLocation Loc, CXXMethodDecl *MD) {
	auto CSM = S.getSpecialMember(MD);
	if (CSM != Sema::CXXInvalid)
	return ComputeDefaultedSpecialMemberExceptionSpec(S, Loc, MD, CSM, nullptr);

	auto *CD = cast<CXXConstructorDecl>(MD);
	assert(CD->getInheritedConstructor() &&
	"only special members have implicit exception specs");
	Sema::InheritedConstructorInfo ICI(
	S, Loc, CD->getInheritedConstructor().getShadowDecl());
	return ComputeDefaultedSpecialMemberExceptionSpec(
	S, Loc, CD, Sema::CXXDefaultConstructor, &ICI);
	}

	static FunctionProtoType::ExtProtoInfo getImplicitMethodEPI(Sema &S,
	CXXMethodDecl *MD) {
	FunctionProtoType::ExtProtoInfo EPI;

	// Build an exception specification pointing back at this member.
	EPI.ExceptionSpec.Type = EST_Unevaluated;
	EPI.ExceptionSpec.SourceDecl = MD;

	// Set the calling convention to the default for C++ instance methods.
	EPI.ExtInfo = EPI.ExtInfo.withCallingConv(
	S.Context.getDefaultCallingConvention(/IsVariadic=/false,
	/IsCXXMethod=/true));
	return EPI;
	}

	void Sema::EvaluateImplicitExceptionSpec(SourceLocation Loc, CXXMethodDecl *MD) {
	const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
	if (FPT->getExceptionSpecType() != EST_Unevaluated)
	return;

	// Evaluate the exception specification.
	auto IES = computeImplicitExceptionSpec(*this, Loc, MD);
	auto ESI = IES.getExceptionSpec();

	// Update the type of the special member to use it.
	UpdateExceptionSpec(MD, ESI);

	// A user-provided destructor can be defined outside the class. When that
	// happens, be sure to update the exception specification on both
	// declarations.
	const FunctionProtoType *CanonicalFPT =
	MD->getCanonicalDecl()->getType()->castAs<FunctionProtoType>();
	if (CanonicalFPT->getExceptionSpecType() == EST_Unevaluated)
	UpdateExceptionSpec(MD->getCanonicalDecl(), ESI);
	}

	void Sema::CheckExplicitlyDefaultedSpecialMember(CXXMethodDecl *MD) {
	CXXRecordDecl *RD = MD->getParent();
	CXXSpecialMember CSM = getSpecialMember(MD);

	assert(MD->isExplicitlyDefaulted() && CSM != CXXInvalid &&
	"not an explicitly-defaulted special member");

	// Whether this was the first-declared instance of the constructor.
	// This affects whether we implicitly add an exception spec and constexpr.
	bool First = MD == MD->getCanonicalDecl();

	bool HadError = false;

	// C++11 [dcl.fct.def.default]p1:
	// A function that is explicitly defaulted shall
	// -- be a special member function (checked elsewhere),
	// -- have the same type (except for ref-qualifiers, and except that a
	// copy operation can take a non-const reference) as an implicit
	// declaration, and
	// -- not have default arguments.
	unsigned ExpectedParams = 1;
	if (CSM == CXXDefaultConstructor \|\| CSM == CXXDestructor)
	ExpectedParams = 0;
	if (MD->getNumParams() != ExpectedParams) {
	// This also checks for default arguments: a copy or move constructor with a
	// default argument is classified as a default constructor, and assignment
	// operations and destructors can't have default arguments.
	Diag(MD->getLocation(), diag::err_defaulted_special_member_params)
	<< CSM << MD->getSourceRange();
	HadError = true;
	} else if (MD->isVariadic()) {
	Diag(MD->getLocation(), diag::err_defaulted_special_member_variadic)
	<< CSM << MD->getSourceRange();
	HadError = true;
	}

	const FunctionProtoType *Type = MD->getType()->getAs<FunctionProtoType>();

	bool CanHaveConstParam = false;
	if (CSM == CXXCopyConstructor)
	CanHaveConstParam = RD->implicitCopyConstructorHasConstParam();
	else if (CSM == CXXCopyAssignment)
	CanHaveConstParam = RD->implicitCopyAssignmentHasConstParam();

	QualType ReturnType = Context.VoidTy;
	if (CSM == CXXCopyAssignment \|\| CSM == CXXMoveAssignment) {
	// Check for return type matching.
	ReturnType = Type->getReturnType();
	QualType ExpectedReturnType =
	Context.getLValueReferenceType(Context.getTypeDeclType(RD));
	if (!Context.hasSameType(ReturnType, ExpectedReturnType)) {
	Diag(MD->getLocation(), diag::err_defaulted_special_member_return_type)
	<< (CSM == CXXMoveAssignment) << ExpectedReturnType;
	HadError = true;
	}

	// A defaulted special member cannot have cv-qualifiers.
	if (Type->getTypeQuals()) {
	Diag(MD->getLocation(), diag::err_defaulted_special_member_quals)
	<< (CSM == CXXMoveAssignment) << getLangOpts().CPlusPlus14;
	HadError = true;
	}
	}

	// Check for parameter type matching.
	QualType ArgType = ExpectedParams ? Type->getParamType(0) : QualType();
	bool HasConstParam = false;
	if (ExpectedParams && ArgType->isReferenceType()) {
	// Argument must be reference to possibly-const T.
	QualType ReferentType = ArgType->getPointeeType();
	HasConstParam = ReferentType.isConstQualified();

	if (ReferentType.isVolatileQualified()) {
	Diag(MD->getLocation(),
	diag::err_defaulted_special_member_volatile_param) << CSM;
	HadError = true;
	}

	if (HasConstParam && !CanHaveConstParam) {
	if (CSM == CXXCopyConstructor \|\| CSM == CXXCopyAssignment) {
	Diag(MD->getLocation(),
	diag::err_defaulted_special_member_copy_const_param)
	<< (CSM == CXXCopyAssignment);
	// FIXME: Explain why this special member can't be const.
	} else {
	Diag(MD->getLocation(),
	diag::err_defaulted_special_member_move_const_param)
	<< (CSM == CXXMoveAssignment);
	}
	HadError = true;
	}
	} else if (ExpectedParams) {
	// A copy assignment operator can take its argument by value, but a
	// defaulted one cannot.
	assert(CSM == CXXCopyAssignment && "unexpected non-ref argument");
	Diag(MD->getLocation(), diag::err_defaulted_copy_assign_not_ref);
	HadError = true;
	}

	// C++11 [dcl.fct.def.default]p2:
	// An explicitly-defaulted function may be declared constexpr only if it
	// would have been implicitly declared as constexpr,
	// Do not apply this rule to members of class templates, since core issue 1358
	// makes such functions always instantiate to constexpr functions. For
	// functions which cannot be constexpr (for non-constructors in C++11 and for
	// destructors in C++1y), this is checked elsewhere.
	bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, RD, CSM,
	HasConstParam);
	if ((getLangOpts().CPlusPlus14 ? !isa<CXXDestructorDecl>(MD)
	: isa<CXXConstructorDecl>(MD)) &&
	MD->isConstexpr() && !Constexpr &&
	MD->getTemplatedKind() == FunctionDecl::TK_NonTemplate) {
	Diag(MD->getLocStart(), diag::err_incorrect_defaulted_constexpr) << CSM;
	// FIXME: Explain why the special member can't be constexpr.
	HadError = true;
	}

	// and may have an explicit exception-specification only if it is compatible
	// with the exception-specification on the implicit declaration.
	if (Type->hasExceptionSpec()) {
	// Delay the check if this is the first declaration of the special member,
	// since we may not have parsed some necessary in-class initializers yet.
	if (First) {
	// If the exception specification needs to be instantiated, do so now,
	// before we clobber it with an EST_Unevaluated specification below.
	if (Type->getExceptionSpecType() == EST_Uninstantiated) {
	InstantiateExceptionSpec(MD->getLocStart(), MD);
	Type = MD->getType()->getAs<FunctionProtoType>();
	}
	DelayedDefaultedMemberExceptionSpecs.push_back(std::make_pair(MD, Type));
	} else
	CheckExplicitlyDefaultedMemberExceptionSpec(MD, Type);
	}

	// If a function is explicitly defaulted on its first declaration,
	if (First) {
	// -- it is implicitly considered to be constexpr if the implicit
	// definition would be,
	MD->setConstexpr(Constexpr);

	// -- it is implicitly considered to have the same exception-specification
	// as if it had been implicitly declared,
	FunctionProtoType::ExtProtoInfo EPI = Type->getExtProtoInfo();
	EPI.ExceptionSpec.Type = EST_Unevaluated;
	EPI.ExceptionSpec.SourceDecl = MD;
	MD->setType(Context.getFunctionType(ReturnType,
	llvm::makeArrayRef(&ArgType,
	ExpectedParams),
	EPI));
	}

	if (ShouldDeleteSpecialMember(MD, CSM)) {
	if (First) {
	SetDeclDeleted(MD, MD->getLocation());
	} else {
	// C++11 [dcl.fct.def.default]p4:
	// [For a] user-provided explicitly-defaulted function [...] if such a
	// function is implicitly defined as deleted, the program is ill-formed.
	Diag(MD->getLocation(), diag::err_out_of_line_default_deletes) << CSM;
	ShouldDeleteSpecialMember(MD, CSM, nullptr, /Diagnose/true);
	HadError = true;
	}
	}

	if (HadError)
	MD->setInvalidDecl();
	}

	/// Check whether the exception specification provided for an
	/// explicitly-defaulted special member matches the exception specification
	/// that would have been generated for an implicit special member, per
	/// C++11 [dcl.fct.def.default]p2.
	void Sema::CheckExplicitlyDefaultedMemberExceptionSpec(
	CXXMethodDecl MD, const FunctionProtoType SpecifiedType) {
	// If the exception specification was explicitly specified but hadn't been
	// parsed when the method was defaulted, grab it now.
	if (SpecifiedType->getExceptionSpecType() == EST_Unparsed)
	SpecifiedType =
	MD->getTypeSourceInfo()->getType()->castAs<FunctionProtoType>();

	// Compute the implicit exception specification.
	CallingConv CC = Context.getDefaultCallingConvention(/IsVariadic=/false,
	/IsCXXMethod=/true);
	FunctionProtoType::ExtProtoInfo EPI(CC);
	auto IES = computeImplicitExceptionSpec(*this, MD->getLocation(), MD);
	EPI.ExceptionSpec = IES.getExceptionSpec();
	const FunctionProtoType *ImplicitType = cast<FunctionProtoType>(
	Context.getFunctionType(Context.VoidTy, None, EPI));

	// Ensure that it matches.
	CheckEquivalentExceptionSpec(
	PDiag(diag::err_incorrect_defaulted_exception_spec)
	<< getSpecialMember(MD), PDiag(),
	ImplicitType, SourceLocation(),
	SpecifiedType, MD->getLocation());
	}

	void Sema::CheckDelayedMemberExceptionSpecs() {
	decltype(DelayedExceptionSpecChecks) Checks;
	decltype(DelayedDefaultedMemberExceptionSpecs) Specs;

	std::swap(Checks, DelayedExceptionSpecChecks);
	std::swap(Specs, DelayedDefaultedMemberExceptionSpecs);

	// Perform any deferred checking of exception specifications for virtual
	// destructors.
	for (auto &Check : Checks)
	CheckOverridingFunctionExceptionSpec(Check.first, Check.second);

	// Check that any explicitly-defaulted methods have exception specifications
	// compatible with their implicit exception specifications.
	for (auto &Spec : Specs)
	CheckExplicitlyDefaultedMemberExceptionSpec(Spec.first, Spec.second);
	}

	namespace {
	/// CRTP base class for visiting operations performed by a special member
	/// function (or inherited constructor).
	template<typename Derived>
	struct SpecialMemberVisitor {
	Sema &S;
	CXXMethodDecl *MD;
	Sema::CXXSpecialMember CSM;
	Sema::InheritedConstructorInfo *ICI;

	// Properties of the special member, computed for convenience.
	bool IsConstructor = false, IsAssignment = false, ConstArg = false;

	SpecialMemberVisitor(Sema &S, CXXMethodDecl *MD, Sema::CXXSpecialMember CSM,
	Sema::InheritedConstructorInfo *ICI)
	: S(S), MD(MD), CSM(CSM), ICI(ICI) {
	switch (CSM) {
	case Sema::CXXDefaultConstructor:
	case Sema::CXXCopyConstructor:
	case Sema::CXXMoveConstructor:
	IsConstructor = true;
	break;
	case Sema::CXXCopyAssignment:
	case Sema::CXXMoveAssignment:
	IsAssignment = true;
	break;
	case Sema::CXXDestructor:
	break;
	case Sema::CXXInvalid:
	llvm_unreachable("invalid special member kind");
	}

	if (MD->getNumParams()) {
	if (const ReferenceType *RT =
	MD->getParamDecl(0)->getType()->getAs<ReferenceType>())
	ConstArg = RT->getPointeeType().isConstQualified();
	}
	}

	Derived &getDerived() { return static_cast<Derived&>(*this); }

	/// Is this a "move" special member?
	bool isMove() const {
	return CSM == Sema::CXXMoveConstructor \|\| CSM == Sema::CXXMoveAssignment;
	}

	/// Look up the corresponding special member in the given class.
	Sema::SpecialMemberOverloadResult lookupIn(CXXRecordDecl *Class,
	unsigned Quals, bool IsMutable) {
	return lookupCallFromSpecialMember(S, Class, CSM, Quals,
	ConstArg && !IsMutable);
	}

	/// Look up the constructor for the specified base class to see if it's
	/// overridden due to this being an inherited constructor.
	Sema::SpecialMemberOverloadResult lookupInheritedCtor(CXXRecordDecl *Class) {
	if (!ICI)
	return {};
	assert(CSM == Sema::CXXDefaultConstructor);
	auto *BaseCtor =
	cast<CXXConstructorDecl>(MD)->getInheritedConstructor().getConstructor();
	if (auto *MD = ICI->findConstructorForBase(Class, BaseCtor).first)
	return MD;
	return {};
	}

	/// A base or member subobject.
	typedef llvm::PointerUnion<CXXBaseSpecifier, FieldDecl> Subobject;

	/// Get the location to use for a subobject in diagnostics.
	static SourceLocation getSubobjectLoc(Subobject Subobj) {
	// FIXME: For an indirect virtual base, the direct base leading to
	// the indirect virtual base would be a more useful choice.
	if (auto B = Subobj.dyn_cast<CXXBaseSpecifier>())
	return B->getBaseTypeLoc();
	else
	return Subobj.get<FieldDecl*>()->getLocation();
	}

	enum BasesToVisit {
	/// Visit all non-virtual (direct) bases.
	VisitNonVirtualBases,
	/// Visit all direct bases, virtual or not.
	VisitDirectBases,
	/// Visit all non-virtual bases, and all virtual bases if the class
	/// is not abstract.
	VisitPotentiallyConstructedBases,
	/// Visit all direct or virtual bases.
	VisitAllBases
	};

	// Visit the bases and members of the class.
	bool visit(BasesToVisit Bases) {
	CXXRecordDecl *RD = MD->getParent();

	if (Bases == VisitPotentiallyConstructedBases)
	Bases = RD->isAbstract() ? VisitNonVirtualBases : VisitAllBases;

	for (auto &B : RD->bases())
	if ((Bases == VisitDirectBases \|\| !B.isVirtual()) &&
	getDerived().visitBase(&B))
	return true;

	if (Bases == VisitAllBases)
	for (auto &B : RD->vbases())
	if (getDerived().visitBase(&B))
	return true;

	for (auto *F : RD->fields())
	if (!F->isInvalidDecl() && !F->isUnnamedBitfield() &&
	getDerived().visitField(F))
	return true;

	return false;
	}
	};
	}

	namespace {
	struct SpecialMemberDeletionInfo
	: SpecialMemberVisitor<SpecialMemberDeletionInfo> {
	bool Diagnose;

	SourceLocation Loc;

	bool AllFieldsAreConst;

	SpecialMemberDeletionInfo(Sema &S, CXXMethodDecl *MD,
	Sema::CXXSpecialMember CSM,
	Sema::InheritedConstructorInfo *ICI, bool Diagnose)
	: SpecialMemberVisitor(S, MD, CSM, ICI), Diagnose(Diagnose),
	Loc(MD->getLocation()), AllFieldsAreConst(true) {}

	bool inUnion() const { return MD->getParent()->isUnion(); }

	Sema::CXXSpecialMember getEffectiveCSM() {
	return ICI ? Sema::CXXInvalid : CSM;
	}

	bool visitBase(CXXBaseSpecifier *Base) { return shouldDeleteForBase(Base); }
	bool visitField(FieldDecl *Field) { return shouldDeleteForField(Field); }

	bool shouldDeleteForBase(CXXBaseSpecifier *Base);
	bool shouldDeleteForField(FieldDecl *FD);
	bool shouldDeleteForAllConstMembers();

	bool shouldDeleteForClassSubobject(CXXRecordDecl *Class, Subobject Subobj,
	unsigned Quals);
	bool shouldDeleteForSubobjectCall(Subobject Subobj,
	Sema::SpecialMemberOverloadResult SMOR,
	bool IsDtorCallInCtor);

	bool isAccessible(Subobject Subobj, CXXMethodDecl *D);
	};
	}

	/// Is the given special member inaccessible when used on the given
	/// sub-object.
	bool SpecialMemberDeletionInfo::isAccessible(Subobject Subobj,
	CXXMethodDecl *target) {
	/// If we're operating on a base class, the object type is the
	/// type of this special member.
	QualType objectTy;
	AccessSpecifier access = target->getAccess();
	if (CXXBaseSpecifier base = Subobj.dyn_cast<CXXBaseSpecifier>()) {
	objectTy = S.Context.getTypeDeclType(MD->getParent());
	access = CXXRecordDecl::MergeAccess(base->getAccessSpecifier(), access);

	// If we're operating on a field, the object type is the type of the field.
	} else {
	objectTy = S.Context.getTypeDeclType(target->getParent());
	}

	return S.isSpecialMemberAccessibleForDeletion(target, access, objectTy);
	}

	/// Check whether we should delete a special member due to the implicit
	/// definition containing a call to a special member of a subobject.
	bool SpecialMemberDeletionInfo::shouldDeleteForSubobjectCall(
	Subobject Subobj, Sema::SpecialMemberOverloadResult SMOR,
	bool IsDtorCallInCtor) {
	CXXMethodDecl *Decl = SMOR.getMethod();
	FieldDecl Field = Subobj.dyn_cast<FieldDecl>();

	int DiagKind = -1;

	if (SMOR.getKind() == Sema::SpecialMemberOverloadResult::NoMemberOrDeleted)
	DiagKind = !Decl ? 0 : 1;
	else if (SMOR.getKind() == Sema::SpecialMemberOverloadResult::Ambiguous)
	DiagKind = 2;
	else if (!isAccessible(Subobj, Decl))
	DiagKind = 3;
	else if (!IsDtorCallInCtor && Field && Field->getParent()->isUnion() &&
	!Decl->isTrivial()) {
	// A member of a union must have a trivial corresponding special member.
	// As a weird special case, a destructor call from a union's constructor
	// must be accessible and non-deleted, but need not be trivial. Such a
	// destructor is never actually called, but is semantically checked as
	// if it were.
	DiagKind = 4;
	}

	if (DiagKind == -1)
	return false;

	if (Diagnose) {
	if (Field) {
	S.Diag(Field->getLocation(),
	diag::note_deleted_special_member_class_subobject)
	<< getEffectiveCSM() << MD->getParent() << /IsField/true
	<< Field << DiagKind << IsDtorCallInCtor;
	} else {
	CXXBaseSpecifier Base = Subobj.get<CXXBaseSpecifier>();
	S.Diag(Base->getLocStart(),
	diag::note_deleted_special_member_class_subobject)
	<< getEffectiveCSM() << MD->getParent() << /IsField/false
	<< Base->getType() << DiagKind << IsDtorCallInCtor;
	}

	if (DiagKind == 1)
	S.NoteDeletedFunction(Decl);
	// FIXME: Explain inaccessibility if DiagKind == 3.
	}

	return true;
	}

	/// Check whether we should delete a special member function due to having a
	/// direct or virtual base class or non-static data member of class type M.
	bool SpecialMemberDeletionInfo::shouldDeleteForClassSubobject(
	CXXRecordDecl *Class, Subobject Subobj, unsigned Quals) {
	FieldDecl Field = Subobj.dyn_cast<FieldDecl>();
	bool IsMutable = Field && Field->isMutable();

	// C++11 [class.ctor]p5:
	// -- any direct or virtual base class, or non-static data member with no
	// brace-or-equal-initializer, has class type M (or array thereof) and
	// either M has no default constructor or overload resolution as applied
	// to M's default constructor results in an ambiguity or in a function
	// that is deleted or inaccessible
	// C++11 [class.copy]p11, C++11 [class.copy]p23:
	// -- a direct or virtual base class B that cannot be copied/moved because
	// overload resolution, as applied to B's corresponding special member,
	// results in an ambiguity or a function that is deleted or inaccessible
	// from the defaulted special member
	// C++11 [class.dtor]p5:
	// -- any direct or virtual base class [...] has a type with a destructor
	// that is deleted or inaccessible
	if (!(CSM == Sema::CXXDefaultConstructor &&
	Field && Field->hasInClassInitializer()) &&
	shouldDeleteForSubobjectCall(Subobj, lookupIn(Class, Quals, IsMutable),
	false))
	return true;

	// C++11 [class.ctor]p5, C++11 [class.copy]p11:
	// -- any direct or virtual base class or non-static data member has a
	// type with a destructor that is deleted or inaccessible
	if (IsConstructor) {
	Sema::SpecialMemberOverloadResult SMOR =
	S.LookupSpecialMember(Class, Sema::CXXDestructor,
	false, false, false, false, false);
	if (shouldDeleteForSubobjectCall(Subobj, SMOR, true))
	return true;
	}

	return false;
	}

	/// Check whether we should delete a special member function due to the class
	/// having a particular direct or virtual base class.
	bool SpecialMemberDeletionInfo::shouldDeleteForBase(CXXBaseSpecifier *Base) {
	CXXRecordDecl *BaseClass = Base->getType()->getAsCXXRecordDecl();
	// If program is correct, BaseClass cannot be null, but if it is, the error
	// must be reported elsewhere.
	if (!BaseClass)
	return false;
	// If we have an inheriting constructor, check whether we're calling an
	// inherited constructor instead of a default constructor.
	Sema::SpecialMemberOverloadResult SMOR = lookupInheritedCtor(BaseClass);
	if (auto *BaseCtor = SMOR.getMethod()) {
	// Note that we do not check access along this path; other than that,
	// this is the same as shouldDeleteForSubobjectCall(Base, BaseCtor, false);
	// FIXME: Check that the base has a usable destructor! Sink this into
	// shouldDeleteForClassSubobject.
	if (BaseCtor->isDeleted() && Diagnose) {
	S.Diag(Base->getLocStart(),
	diag::note_deleted_special_member_class_subobject)
	<< getEffectiveCSM() << MD->getParent() << /IsField/false
	<< Base->getType() << /Deleted/1 << /IsDtorCallInCtor/false;
	S.NoteDeletedFunction(BaseCtor);
	}
	return BaseCtor->isDeleted();
	}
	return shouldDeleteForClassSubobject(BaseClass, Base, 0);
	}

	/// Check whether we should delete a special member function due to the class
	/// having a particular non-static data member.
	bool SpecialMemberDeletionInfo::shouldDeleteForField(FieldDecl *FD) {
	QualType FieldType = S.Context.getBaseElementType(FD->getType());
	CXXRecordDecl *FieldRecord = FieldType->getAsCXXRecordDecl();

	if (CSM == Sema::CXXDefaultConstructor) {
	// For a default constructor, all references must be initialized in-class
	// and, if a union, it must have a non-const member.
	if (FieldType->isReferenceType() && !FD->hasInClassInitializer()) {
	if (Diagnose)
	S.Diag(FD->getLocation(), diag::note_deleted_default_ctor_uninit_field)
	<< !!ICI << MD->getParent() << FD << FieldType << /Reference/0;
	return true;
	}
	// C++11 [class.ctor]p5: any non-variant non-static data member of
	// const-qualified type (or array thereof) with no
	// brace-or-equal-initializer does not have a user-provided default
	// constructor.
	if (!inUnion() && FieldType.isConstQualified() &&
	!FD->hasInClassInitializer() &&
	(!FieldRecord \|\| !FieldRecord->hasUserProvidedDefaultConstructor())) {
	if (Diagnose)
	S.Diag(FD->getLocation(), diag::note_deleted_default_ctor_uninit_field)
	<< !!ICI << MD->getParent() << FD << FD->getType() << /Const/1;
	return true;
	}

	if (inUnion() && !FieldType.isConstQualified())
	AllFieldsAreConst = false;
	} else if (CSM == Sema::CXXCopyConstructor) {
	// For a copy constructor, data members must not be of rvalue reference
	// type.
	if (FieldType->isRValueReferenceType()) {
	if (Diagnose)
	S.Diag(FD->getLocation(), diag::note_deleted_copy_ctor_rvalue_reference)
	<< MD->getParent() << FD << FieldType;
	return true;
	}
	} else if (IsAssignment) {
	// For an assignment operator, data members must not be of reference type.
	if (FieldType->isReferenceType()) {
	if (Diagnose)
	S.Diag(FD->getLocation(), diag::note_deleted_assign_field)
	<< isMove() << MD->getParent() << FD << FieldType << /Reference/0;
	return true;
	}
	if (!FieldRecord && FieldType.isConstQualified()) {
	// C++11 [class.copy]p23:
	// -- a non-static data member of const non-class type (or array thereof)
	if (Diagnose)
	S.Diag(FD->getLocation(), diag::note_deleted_assign_field)
	<< isMove() << MD->getParent() << FD << FD->getType() << /Const/1;
	return true;
	}
	}

	if (FieldRecord) {
	// Some additional restrictions exist on the variant members.
	if (!inUnion() && FieldRecord->isUnion() &&
	FieldRecord->isAnonymousStructOrUnion()) {
	bool AllVariantFieldsAreConst = true;

	// FIXME: Handle anonymous unions declared within anonymous unions.
	for (auto *UI : FieldRecord->fields()) {
	QualType UnionFieldType = S.Context.getBaseElementType(UI->getType());

	if (!UnionFieldType.isConstQualified())
	AllVariantFieldsAreConst = false;

	CXXRecordDecl *UnionFieldRecord = UnionFieldType->getAsCXXRecordDecl();
	if (UnionFieldRecord &&
	shouldDeleteForClassSubobject(UnionFieldRecord, UI,
	UnionFieldType.getCVRQualifiers()))
	return true;
	}

	// At least one member in each anonymous union must be non-const
	if (CSM == Sema::CXXDefaultConstructor && AllVariantFieldsAreConst &&
	!FieldRecord->field_empty()) {
	if (Diagnose)
	S.Diag(FieldRecord->getLocation(),
	diag::note_deleted_default_ctor_all_const)
	<< !!ICI << MD->getParent() << /anonymous union/1;
	return true;
	}

	// Don't check the implicit member of the anonymous union type.
	// This is technically non-conformant, but sanity demands it.
	return false;
	}

	if (shouldDeleteForClassSubobject(FieldRecord, FD,
	FieldType.getCVRQualifiers()))
	return true;
	}

	return false;
	}

	/// C++11 [class.ctor] p5:
	/// A defaulted default constructor for a class X is defined as deleted if
	/// X is a union and all of its variant members are of const-qualified type.
	bool SpecialMemberDeletionInfo::shouldDeleteForAllConstMembers() {
	// This is a silly definition, because it gives an empty union a deleted
	// default constructor. Don't do that.
	if (CSM == Sema::CXXDefaultConstructor && inUnion() && AllFieldsAreConst) {
	bool AnyFields = false;
	for (auto *F : MD->getParent()->fields())
	if ((AnyFields = !F->isUnnamedBitfield()))
	break;
	if (!AnyFields)
	return false;
	if (Diagnose)
	S.Diag(MD->getParent()->getLocation(),
	diag::note_deleted_default_ctor_all_const)
	<< !!ICI << MD->getParent() << /not anonymous union/0;
	return true;
	}
	return false;
	}

	/// Determine whether a defaulted special member function should be defined as
	/// deleted, as specified in C++11 [class.ctor]p5, C++11 [class.copy]p11,
	/// C++11 [class.copy]p23, and C++11 [class.dtor]p5.
	bool Sema::ShouldDeleteSpecialMember(CXXMethodDecl *MD, CXXSpecialMember CSM,
	InheritedConstructorInfo *ICI,
	bool Diagnose) {
	if (MD->isInvalidDecl())
	return false;
	CXXRecordDecl *RD = MD->getParent();
	assert(!RD->isDependentType() && "do deletion after instantiation");
	if (!LangOpts.CPlusPlus11 \|\| RD->isInvalidDecl())
	return false;

	// C++11 [expr.lambda.prim]p19:
	// The closure type associated with a lambda-expression has a
	// deleted (8.4.3) default constructor and a deleted copy
	// assignment operator.
	if (RD->isLambda() &&
	(CSM == CXXDefaultConstructor \|\| CSM == CXXCopyAssignment)) {
	if (Diagnose)
	Diag(RD->getLocation(), diag::note_lambda_decl);
	return true;
	}

	// For an anonymous struct or union, the copy and assignment special members
	// will never be used, so skip the check. For an anonymous union declared at
	// namespace scope, the constructor and destructor are used.
	if (CSM != CXXDefaultConstructor && CSM != CXXDestructor &&
	RD->isAnonymousStructOrUnion())
	return false;

	// C++11 [class.copy]p7, p18:
	// If the class definition declares a move constructor or move assignment
	// operator, an implicitly declared copy constructor or copy assignment
	// operator is defined as deleted.
	if (MD->isImplicit() &&
	(CSM == CXXCopyConstructor \|\| CSM == CXXCopyAssignment)) {
	CXXMethodDecl *UserDeclaredMove = nullptr;

	// In Microsoft mode up to MSVC 2013, a user-declared move only causes the
	// deletion of the corresponding copy operation, not both copy operations.
	// MSVC 2015 has adopted the standards conforming behavior.
	bool DeletesOnlyMatchingCopy =
	getLangOpts().MSVCCompat &&
	!getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015);

	if (RD->hasUserDeclaredMoveConstructor() &&
	(!DeletesOnlyMatchingCopy \|\| CSM == CXXCopyConstructor)) {
	if (!Diagnose) return true;

	// Find any user-declared move constructor.
	for (auto *I : RD->ctors()) {
	if (I->isMoveConstructor()) {
	UserDeclaredMove = I;
	break;
	}
	}
	assert(UserDeclaredMove);
	} else if (RD->hasUserDeclaredMoveAssignment() &&
	(!DeletesOnlyMatchingCopy \|\| CSM == CXXCopyAssignment)) {
	if (!Diagnose) return true;

	// Find any user-declared move assignment operator.
	for (auto *I : RD->methods()) {
	if (I->isMoveAssignmentOperator()) {
	UserDeclaredMove = I;
	break;
	}
	}
	assert(UserDeclaredMove);
	}

	if (UserDeclaredMove) {
	Diag(UserDeclaredMove->getLocation(),
	diag::note_deleted_copy_user_declared_move)
	<< (CSM == CXXCopyAssignment) << RD
	<< UserDeclaredMove->isMoveAssignmentOperator();
	return true;
	}
	}

	// Do access control from the special member function
	ContextRAII MethodContext(*this, MD);

	// C++11 [class.dtor]p5:
	// -- for a virtual destructor, lookup of the non-array deallocation function
	// results in an ambiguity or in a function that is deleted or inaccessible
	if (CSM == CXXDestructor && MD->isVirtual()) {
	FunctionDecl *OperatorDelete = nullptr;
	DeclarationName Name =
	Context.DeclarationNames.getCXXOperatorName(OO_Delete);
	if (FindDeallocationFunction(MD->getLocation(), MD->getParent(), Name,
	OperatorDelete, /Diagnose/false)) {
	if (Diagnose)
	Diag(RD->getLocation(), diag::note_deleted_dtor_no_operator_delete);
	return true;
	}
	}

	SpecialMemberDeletionInfo SMI(*this, MD, CSM, ICI, Diagnose);

	// Per DR1611, do not consider virtual bases of constructors of abstract
	// classes, since we are not going to construct them.
	// Per DR1658, do not consider virtual bases of destructors of abstract
	// classes either.
	// Per DR2180, for assignment operators we only assign (and thus only
	// consider) direct bases.
	if (SMI.visit(SMI.IsAssignment ? SMI.VisitDirectBases
	: SMI.VisitPotentiallyConstructedBases))
	return true;

	if (SMI.shouldDeleteForAllConstMembers())
	return true;

	if (getLangOpts().CUDA) {
	// We should delete the special member in CUDA mode if target inference
	// failed.
	return inferCUDATargetForImplicitSpecialMember(RD, CSM, MD, SMI.ConstArg,
	Diagnose);
	}

	return false;
	}

	/// Perform lookup for a special member of the specified kind, and determine
	/// whether it is trivial. If the triviality can be determined without the
	/// lookup, skip it. This is intended for use when determining whether a
	/// special member of a containing object is trivial, and thus does not ever
	/// perform overload resolution for default constructors.
	///
	/// If \p Selected is not \c NULL, \c *Selected will be filled in with the
	/// member that was most likely to be intended to be trivial, if any.
	static bool findTrivialSpecialMember(Sema &S, CXXRecordDecl *RD,
	Sema::CXXSpecialMember CSM, unsigned Quals,
	bool ConstRHS, CXXMethodDecl **Selected) {
	if (Selected)
	*Selected = nullptr;

	switch (CSM) {
	case Sema::CXXInvalid:
	llvm_unreachable("not a special member");

	case Sema::CXXDefaultConstructor:
	// C++11 [class.ctor]p5:
	// A default constructor is trivial if:
	// - all the [direct subobjects] have trivial default constructors
	//
	// Note, no overload resolution is performed in this case.
	if (RD->hasTrivialDefaultConstructor())
	return true;

	if (Selected) {
	// If there's a default constructor which could have been trivial, dig it
	// out. Otherwise, if there's any user-provided default constructor, point
	// to that as an example of why there's not a trivial one.
	CXXConstructorDecl *DefCtor = nullptr;
	if (RD->needsImplicitDefaultConstructor())
	S.DeclareImplicitDefaultConstructor(RD);
	for (auto *CI : RD->ctors()) {
	if (!CI->isDefaultConstructor())
	continue;
	DefCtor = CI;
	if (!DefCtor->isUserProvided())
	break;
	}

	*Selected = DefCtor;
	}

	return false;

	case Sema::CXXDestructor:
	// C++11 [class.dtor]p5:
	// A destructor is trivial if:
	// - all the direct [subobjects] have trivial destructors
	if (RD->hasTrivialDestructor())
	return true;

	if (Selected) {
	if (RD->needsImplicitDestructor())
	S.DeclareImplicitDestructor(RD);
	*Selected = RD->getDestructor();
	}

	return false;

	case Sema::CXXCopyConstructor:
	// C++11 [class.copy]p12:
	// A copy constructor is trivial if:
	// - the constructor selected to copy each direct [subobject] is trivial
	if (RD->hasTrivialCopyConstructor()) {
	if (Quals == Qualifiers::Const)
	// We must either select the trivial copy constructor or reach an
	// ambiguity; no need to actually perform overload resolution.
	return true;
	} else if (!Selected) {
	return false;
	}
	// In C++98, we are not supposed to perform overload resolution here, but we
	// treat that as a language defect, as suggested on cxx-abi-dev, to treat
	// cases like B as having a non-trivial copy constructor:
	// struct A { template<typename T> A(T&); };
	// struct B { mutable A a; };
	goto NeedOverloadResolution;

	case Sema::CXXCopyAssignment:
	// C++11 [class.copy]p25:
	// A copy assignment operator is trivial if:
	// - the assignment operator selected to copy each direct [subobject] is
	// trivial
	if (RD->hasTrivialCopyAssignment()) {
	if (Quals == Qualifiers::Const)
	return true;
	} else if (!Selected) {
	return false;
	}
	// In C++98, we are not supposed to perform overload resolution here, but we
	// treat that as a language defect.
	goto NeedOverloadResolution;

	case Sema::CXXMoveConstructor:
	case Sema::CXXMoveAssignment:
	NeedOverloadResolution:
	Sema::SpecialMemberOverloadResult SMOR =
	lookupCallFromSpecialMember(S, RD, CSM, Quals, ConstRHS);

	// The standard doesn't describe how to behave if the lookup is ambiguous.
	// We treat it as not making the member non-trivial, just like the standard
	// mandates for the default constructor. This should rarely matter, because
	// the member will also be deleted.
	if (SMOR.getKind() == Sema::SpecialMemberOverloadResult::Ambiguous)
	return true;

	if (!SMOR.getMethod()) {
	assert(SMOR.getKind() ==
	Sema::SpecialMemberOverloadResult::NoMemberOrDeleted);
	return false;
	}

	// We deliberately don't check if we found a deleted special member. We're
	// not supposed to!
	if (Selected)
	*Selected = SMOR.getMethod();
	return SMOR.getMethod()->isTrivial();
	}

	llvm_unreachable("unknown special method kind");
	}

	static CXXConstructorDecl findUserDeclaredCtor(CXXRecordDecl RD) {
	for (auto *CI : RD->ctors())
	if (!CI->isImplicit())
	return CI;

	// Look for constructor templates.
	typedef CXXRecordDecl::specific_decl_iterator<FunctionTemplateDecl> tmpl_iter;
	for (tmpl_iter TI(RD->decls_begin()), TE(RD->decls_end()); TI != TE; ++TI) {
	if (CXXConstructorDecl *CD =
	dyn_cast<CXXConstructorDecl>(TI->getTemplatedDecl()))
	return CD;
	}

	return nullptr;
	}

	/// The kind of subobject we are checking for triviality. The values of this
	/// enumeration are used in diagnostics.
	enum TrivialSubobjectKind {
	/// The subobject is a base class.
	TSK_BaseClass,
	/// The subobject is a non-static data member.
	TSK_Field,
	/// The object is actually the complete object.
	TSK_CompleteObject
	};

	/// Check whether the special member selected for a given type would be trivial.
	static bool checkTrivialSubobjectCall(Sema &S, SourceLocation SubobjLoc,
	QualType SubType, bool ConstRHS,
	Sema::CXXSpecialMember CSM,
	TrivialSubobjectKind Kind,
	bool Diagnose) {
	CXXRecordDecl *SubRD = SubType->getAsCXXRecordDecl();
	if (!SubRD)
	return true;

	CXXMethodDecl *Selected;
	if (findTrivialSpecialMember(S, SubRD, CSM, SubType.getCVRQualifiers(),
	ConstRHS, Diagnose ? &Selected : nullptr))
	return true;

	if (Diagnose) {
	if (ConstRHS)
	SubType.addConst();

	if (!Selected && CSM == Sema::CXXDefaultConstructor) {
	S.Diag(SubobjLoc, diag::note_nontrivial_no_def_ctor)
	<< Kind << SubType.getUnqualifiedType();
	if (CXXConstructorDecl *CD = findUserDeclaredCtor(SubRD))
	S.Diag(CD->getLocation(), diag::note_user_declared_ctor);
	} else if (!Selected)
	S.Diag(SubobjLoc, diag::note_nontrivial_no_copy)
	<< Kind << SubType.getUnqualifiedType() << CSM << SubType;
	else if (Selected->isUserProvided()) {
	if (Kind == TSK_CompleteObject)
	S.Diag(Selected->getLocation(), diag::note_nontrivial_user_provided)
	<< Kind << SubType.getUnqualifiedType() << CSM;
	else {
	S.Diag(SubobjLoc, diag::note_nontrivial_user_provided)
	<< Kind << SubType.getUnqualifiedType() << CSM;
	S.Diag(Selected->getLocation(), diag::note_declared_at);
	}
	} else {
	if (Kind != TSK_CompleteObject)
	S.Diag(SubobjLoc, diag::note_nontrivial_subobject)
	<< Kind << SubType.getUnqualifiedType() << CSM;

	// Explain why the defaulted or deleted special member isn't trivial.
	S.SpecialMemberIsTrivial(Selected, CSM, Diagnose);
	}
	}

	return false;
	}

	/// Check whether the members of a class type allow a special member to be
	/// trivial.
	static bool checkTrivialClassMembers(Sema &S, CXXRecordDecl *RD,
	Sema::CXXSpecialMember CSM,
	bool ConstArg, bool Diagnose) {
	for (const auto *FI : RD->fields()) {
	if (FI->isInvalidDecl() \|\| FI->isUnnamedBitfield())
	continue;

	QualType FieldType = S.Context.getBaseElementType(FI->getType());

	// Pretend anonymous struct or union members are members of this class.
	if (FI->isAnonymousStructOrUnion()) {
	if (!checkTrivialClassMembers(S, FieldType->getAsCXXRecordDecl(),
	CSM, ConstArg, Diagnose))
	return false;
	continue;
	}

	// C++11 [class.ctor]p5:
	// A default constructor is trivial if [...]
	// -- no non-static data member of its class has a
	// brace-or-equal-initializer
	if (CSM == Sema::CXXDefaultConstructor && FI->hasInClassInitializer()) {
	if (Diagnose)
	S.Diag(FI->getLocation(), diag::note_nontrivial_in_class_init) << FI;
	return false;
	}

	// Objective C ARC 4.3.5:
	// [...] nontrivally ownership-qualified types are [...] not trivially
	// default constructible, copy constructible, move constructible, copy
	// assignable, move assignable, or destructible [...]
	if (FieldType.hasNonTrivialObjCLifetime()) {
	if (Diagnose)
	S.Diag(FI->getLocation(), diag::note_nontrivial_objc_ownership)
	<< RD << FieldType.getObjCLifetime();
	return false;
	}

	bool ConstRHS = ConstArg && !FI->isMutable();
	if (!checkTrivialSubobjectCall(S, FI->getLocation(), FieldType, ConstRHS,
	CSM, TSK_Field, Diagnose))
	return false;
	}

	return true;
	}

	/// Diagnose why the specified class does not have a trivial special member of
	/// the given kind.
	void Sema::DiagnoseNontrivial(const CXXRecordDecl *RD, CXXSpecialMember CSM) {
	QualType Ty = Context.getRecordType(RD);

	bool ConstArg = (CSM == CXXCopyConstructor \|\| CSM == CXXCopyAssignment);
	checkTrivialSubobjectCall(*this, RD->getLocation(), Ty, ConstArg, CSM,
	TSK_CompleteObject, /Diagnose/true);
	}

	/// Determine whether a defaulted or deleted special member function is trivial,
	/// as specified in C++11 [class.ctor]p5, C++11 [class.copy]p12,
	/// C++11 [class.copy]p25, and C++11 [class.dtor]p5.
	bool Sema::SpecialMemberIsTrivial(CXXMethodDecl *MD, CXXSpecialMember CSM,
	bool Diagnose) {
	assert(!MD->isUserProvided() && CSM != CXXInvalid && "not special enough");

	CXXRecordDecl *RD = MD->getParent();

	bool ConstArg = false;

	// C++11 [class.copy]p12, p25: [DR1593]
	// A [special member] is trivial if [...] its parameter-type-list is
	// equivalent to the parameter-type-list of an implicit declaration [...]
	switch (CSM) {
	case CXXDefaultConstructor:
	case CXXDestructor:
	// Trivial default constructors and destructors cannot have parameters.
	break;

	case CXXCopyConstructor:
	case CXXCopyAssignment: {
	// Trivial copy operations always have const, non-volatile parameter types.
	ConstArg = true;
	const ParmVarDecl *Param0 = MD->getParamDecl(0);
	const ReferenceType *RT = Param0->getType()->getAs<ReferenceType>();
	if (!RT \|\| RT->getPointeeType().getCVRQualifiers() != Qualifiers::Const) {
	if (Diagnose)
	Diag(Param0->getLocation(), diag::note_nontrivial_param_type)
	<< Param0->getSourceRange() << Param0->getType()
	<< Context.getLValueReferenceType(
	Context.getRecordType(RD).withConst());
	return false;
	}
	break;
	}

	case CXXMoveConstructor:
	case CXXMoveAssignment: {
	// Trivial move operations always have non-cv-qualified parameters.
	const ParmVarDecl *Param0 = MD->getParamDecl(0);
	const RValueReferenceType *RT =
	Param0->getType()->getAs<RValueReferenceType>();
	if (!RT \|\| RT->getPointeeType().getCVRQualifiers()) {
	if (Diagnose)
	Diag(Param0->getLocation(), diag::note_nontrivial_param_type)
	<< Param0->getSourceRange() << Param0->getType()
	<< Context.getRValueReferenceType(Context.getRecordType(RD));
	return false;
	}
	break;
	}

	case CXXInvalid:
	llvm_unreachable("not a special member");
	}

	if (MD->getMinRequiredArguments() < MD->getNumParams()) {
	if (Diagnose)
	Diag(MD->getParamDecl(MD->getMinRequiredArguments())->getLocation(),
	diag::note_nontrivial_default_arg)
	<< MD->getParamDecl(MD->getMinRequiredArguments())->getSourceRange();
	return false;
	}
	if (MD->isVariadic()) {
	if (Diagnose)
	Diag(MD->getLocation(), diag::note_nontrivial_variadic);
	return false;
	}

	// C++11 [class.ctor]p5, C++11 [class.dtor]p5:
	// A copy/move [constructor or assignment operator] is trivial if
	// -- the [member] selected to copy/move each direct base class subobject
	// is trivial
	//
	// C++11 [class.copy]p12, C++11 [class.copy]p25:
	// A [default constructor or destructor] is trivial if
	// -- all the direct base classes have trivial [default constructors or
	// destructors]
	for (const auto &BI : RD->bases())
	if (!checkTrivialSubobjectCall(*this, BI.getLocStart(), BI.getType(),
	ConstArg, CSM, TSK_BaseClass, Diagnose))
	return false;

	// C++11 [class.ctor]p5, C++11 [class.dtor]p5:
	// A copy/move [constructor or assignment operator] for a class X is
	// trivial if
	// -- for each non-static data member of X that is of class type (or array
	// thereof), the constructor selected to copy/move that member is
	// trivial
	//
	// C++11 [class.copy]p12, C++11 [class.copy]p25:
	// A [default constructor or destructor] is trivial if
	// -- for all of the non-static data members of its class that are of class
	// type (or array thereof), each such class has a trivial [default
	// constructor or destructor]
	if (!checkTrivialClassMembers(*this, RD, CSM, ConstArg, Diagnose))
	return false;

	// C++11 [class.dtor]p5:
	// A destructor is trivial if [...]
	// -- the destructor is not virtual
	if (CSM == CXXDestructor && MD->isVirtual()) {
	if (Diagnose)
	Diag(MD->getLocation(), diag::note_nontrivial_virtual_dtor) << RD;
	return false;
	}

	// C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25:
	// A [special member] for class X is trivial if [...]
	// -- class X has no virtual functions and no virtual base classes
	if (CSM != CXXDestructor && MD->getParent()->isDynamicClass()) {
	if (!Diagnose)
	return false;

	if (RD->getNumVBases()) {
	// Check for virtual bases. We already know that the corresponding
	// member in all bases is trivial, so vbases must all be direct.
	CXXBaseSpecifier &BS = *RD->vbases_begin();
	assert(BS.isVirtual());
	Diag(BS.getLocStart(), diag::note_nontrivial_has_virtual) << RD << 1;
	return false;
	}

	// Must have a virtual method.
	for (const auto *MI : RD->methods()) {
	if (MI->isVirtual()) {
	SourceLocation MLoc = MI->getLocStart();
	Diag(MLoc, diag::note_nontrivial_has_virtual) << RD << 0;
	return false;
	}
	}

	llvm_unreachable("dynamic class with no vbases and no virtual functions");
	}

	// Looks like it's trivial!
	return true;
	}

	namespace {
	struct FindHiddenVirtualMethod {
	Sema *S;
	CXXMethodDecl *Method;
	llvm::SmallPtrSet<const CXXMethodDecl *, 8> OverridenAndUsingBaseMethods;
	SmallVector<CXXMethodDecl *, 8> OverloadedMethods;

	private:
	/// Check whether any most overriden method from MD in Methods
	static bool CheckMostOverridenMethods(
	const CXXMethodDecl *MD,
	const llvm::SmallPtrSetImpl<const CXXMethodDecl *> &Methods) {
	if (MD->size_overridden_methods() == 0)
	return Methods.count(MD->getCanonicalDecl());
	for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
	E = MD->end_overridden_methods();
	I != E; ++I)
	if (CheckMostOverridenMethods(*I, Methods))
	return true;
	return false;
	}

	public:
	/// Member lookup function that determines whether a given C++
	/// method overloads virtual methods in a base class without overriding any,
	/// to be used with CXXRecordDecl::lookupInBases().
	bool operator()(const CXXBaseSpecifier *Specifier, CXXBasePath &Path) {
	RecordDecl *BaseRecord =
	Specifier->getType()->getAs<RecordType>()->getDecl();

	DeclarationName Name = Method->getDeclName();
	assert(Name.getNameKind() == DeclarationName::Identifier);

	bool foundSameNameMethod = false;
	SmallVector<CXXMethodDecl *, 8> overloadedMethods;
	for (Path.Decls = BaseRecord->lookup(Name); !Path.Decls.empty();
	Path.Decls = Path.Decls.slice(1)) {
	NamedDecl *D = Path.Decls.front();
	if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) {
	MD = MD->getCanonicalDecl();
	foundSameNameMethod = true;
	// Interested only in hidden virtual methods.
	if (!MD->isVirtual())
	continue;
	// If the method we are checking overrides a method from its base
	// don't warn about the other overloaded methods. Clang deviates from
	// GCC by only diagnosing overloads of inherited virtual functions that
	// do not override any other virtual functions in the base. GCC's
	// -Woverloaded-virtual diagnoses any derived function hiding a virtual
	// function from a base class. These cases may be better served by a
	// warning (not specific to virtual functions) on call sites when the
	// call would select a different function from the base class, were it
	// visible.
	// See FIXME in test/SemaCXX/warn-overload-virtual.cpp for an example.
	if (!S->IsOverload(Method, MD, false))
	return true;
	// Collect the overload only if its hidden.
	if (!CheckMostOverridenMethods(MD, OverridenAndUsingBaseMethods))
	overloadedMethods.push_back(MD);
	}
	}

	if (foundSameNameMethod)
	OverloadedMethods.append(overloadedMethods.begin(),
	overloadedMethods.end());
	return foundSameNameMethod;
	}
	};
	} // end anonymous namespace

	/// \brief Add the most overriden methods from MD to Methods
	static void AddMostOverridenMethods(const CXXMethodDecl *MD,
	llvm::SmallPtrSetImpl<const CXXMethodDecl *>& Methods) {
	if (MD->size_overridden_methods() == 0)
	Methods.insert(MD->getCanonicalDecl());
	for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
	E = MD->end_overridden_methods();
	I != E; ++I)
	AddMostOverridenMethods(*I, Methods);
	}

	/// \brief Check if a method overloads virtual methods in a base class without
	/// overriding any.
	void Sema::FindHiddenVirtualMethods(CXXMethodDecl *MD,
	SmallVectorImpl<CXXMethodDecl*> &OverloadedMethods) {
	if (!MD->getDeclName().isIdentifier())
	return;

	CXXBasePaths Paths(/FindAmbiguities=/true, // true to look in all bases.
	/bool RecordPaths=/false,
	/bool DetectVirtual=/false);
	FindHiddenVirtualMethod FHVM;
	FHVM.Method = MD;
	FHVM.S = this;

	// Keep the base methods that were overriden or introduced in the subclass
	// by 'using' in a set. A base method not in this set is hidden.
	CXXRecordDecl *DC = MD->getParent();
	DeclContext::lookup_result R = DC->lookup(MD->getDeclName());
	for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E; ++I) {
	NamedDecl ND = I;
	if (UsingShadowDecl shad = dyn_cast<UsingShadowDecl>(I))
	ND = shad->getTargetDecl();
	if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(ND))
	AddMostOverridenMethods(MD, FHVM.OverridenAndUsingBaseMethods);
	}

	if (DC->lookupInBases(FHVM, Paths))
	OverloadedMethods = FHVM.OverloadedMethods;
	}

	void Sema::NoteHiddenVirtualMethods(CXXMethodDecl *MD,
	SmallVectorImpl<CXXMethodDecl*> &OverloadedMethods) {
	for (unsigned i = 0, e = OverloadedMethods.size(); i != e; ++i) {
	CXXMethodDecl *overloadedMD = OverloadedMethods[i];
	PartialDiagnostic PD = PDiag(
	diag::note_hidden_overloaded_virtual_declared_here) << overloadedMD;
	HandleFunctionTypeMismatch(PD, MD->getType(), overloadedMD->getType());
	Diag(overloadedMD->getLocation(), PD);
	}
	}

	/// \brief Diagnose methods which overload virtual methods in a base class
	/// without overriding any.
	void Sema::DiagnoseHiddenVirtualMethods(CXXMethodDecl *MD) {
	if (MD->isInvalidDecl())
	return;

	if (Diags.isIgnored(diag::warn_overloaded_virtual, MD->getLocation()))
	return;

	SmallVector<CXXMethodDecl *, 8> OverloadedMethods;
	FindHiddenVirtualMethods(MD, OverloadedMethods);
	if (!OverloadedMethods.empty()) {
	Diag(MD->getLocation(), diag::warn_overloaded_virtual)
	<< MD << (OverloadedMethods.size() > 1);

	NoteHiddenVirtualMethods(MD, OverloadedMethods);
	}
	}

	void Sema::ActOnFinishCXXMemberSpecification(Scope* S, SourceLocation RLoc,
	Decl *TagDecl,
	SourceLocation LBrac,
	SourceLocation RBrac,
	AttributeList *AttrList) {
	if (!TagDecl)
	return;

	AdjustDeclIfTemplate(TagDecl);

	for (const AttributeList* l = AttrList; l; l = l->getNext()) {
	if (l->getKind() != AttributeList::AT_Visibility)
	continue;
	l->setInvalid();
	Diag(l->getLoc(), diag::warn_attribute_after_definition_ignored) <<
	l->getName();
	}

	ActOnFields(S, RLoc, TagDecl, llvm::makeArrayRef(
	// strict aliasing violation!
	reinterpret_cast<Decl**>(FieldCollector->getCurFields()),
	FieldCollector->getCurNumFields()), LBrac, RBrac, AttrList);

	- CheckCompletedCXXClass(
	- dyn_cast_or_null<CXXRecordDecl>(TagDecl));
	+ CheckCompletedCXXClass(dyn_cast_or_null<CXXRecordDecl>(TagDecl));
	}

	/// AddImplicitlyDeclaredMembersToClass - Adds any implicitly-declared
	/// special functions, such as the default constructor, copy
	/// constructor, or destructor, to the given C++ class (C++
	/// [special]p1). This routine can only be executed just before the
	/// definition of the class is complete.
	void Sema::AddImplicitlyDeclaredMembersToClass(CXXRecordDecl *ClassDecl) {
	if (ClassDecl->needsImplicitDefaultConstructor()) {
	++ASTContext::NumImplicitDefaultConstructors;

	if (ClassDecl->hasInheritedConstructor())
	DeclareImplicitDefaultConstructor(ClassDecl);
	}

	if (ClassDecl->needsImplicitCopyConstructor()) {
	++ASTContext::NumImplicitCopyConstructors;

	// If the properties or semantics of the copy constructor couldn't be
	// determined while the class was being declared, force a declaration
	// of it now.
	if (ClassDecl->needsOverloadResolutionForCopyConstructor() \|\|
	ClassDecl->hasInheritedConstructor())
	DeclareImplicitCopyConstructor(ClassDecl);
	// For the MS ABI we need to know whether the copy ctor is deleted. A
	// prerequisite for deleting the implicit copy ctor is that the class has a
	// move ctor or move assignment that is either user-declared or whose
	// semantics are inherited from a subobject. FIXME: We should provide a more
	// direct way for CodeGen to ask whether the constructor was deleted.
	else if (Context.getTargetInfo().getCXXABI().isMicrosoft() &&
	(ClassDecl->hasUserDeclaredMoveConstructor() \|\|
	ClassDecl->needsOverloadResolutionForMoveConstructor() \|\|
	ClassDecl->hasUserDeclaredMoveAssignment() \|\|
	ClassDecl->needsOverloadResolutionForMoveAssignment()))
	DeclareImplicitCopyConstructor(ClassDecl);
	}

	if (getLangOpts().CPlusPlus11 && ClassDecl->needsImplicitMoveConstructor()) {
	++ASTContext::NumImplicitMoveConstructors;

	if (ClassDecl->needsOverloadResolutionForMoveConstructor() \|\|
	ClassDecl->hasInheritedConstructor())
	DeclareImplicitMoveConstructor(ClassDecl);
	}

	if (ClassDecl->needsImplicitCopyAssignment()) {
	++ASTContext::NumImplicitCopyAssignmentOperators;

	// If we have a dynamic class, then the copy assignment operator may be
	// virtual, so we have to declare it immediately. This ensures that, e.g.,
	// it shows up in the right place in the vtable and that we diagnose
	// problems with the implicit exception specification.
	if (ClassDecl->isDynamicClass() \|\|
	ClassDecl->needsOverloadResolutionForCopyAssignment() \|\|
	ClassDecl->hasInheritedAssignment())
	DeclareImplicitCopyAssignment(ClassDecl);
	}

	if (getLangOpts().CPlusPlus11 && ClassDecl->needsImplicitMoveAssignment()) {
	++ASTContext::NumImplicitMoveAssignmentOperators;

	// Likewise for the move assignment operator.
	if (ClassDecl->isDynamicClass() \|\|
	ClassDecl->needsOverloadResolutionForMoveAssignment() \|\|
	ClassDecl->hasInheritedAssignment())
	DeclareImplicitMoveAssignment(ClassDecl);
	}

	if (ClassDecl->needsImplicitDestructor()) {
	++ASTContext::NumImplicitDestructors;

	// If we have a dynamic class, then the destructor may be virtual, so we
	// have to declare the destructor immediately. This ensures that, e.g., it
	// shows up in the right place in the vtable and that we diagnose problems
	// with the implicit exception specification.
	if (ClassDecl->isDynamicClass() \|\|
	ClassDecl->needsOverloadResolutionForDestructor())
	DeclareImplicitDestructor(ClassDecl);
	}
	}

	unsigned Sema::ActOnReenterTemplateScope(Scope S, Decl D) {
	if (!D)
	return 0;

	// The order of template parameters is not important here. All names
	// get added to the same scope.
	SmallVector<TemplateParameterList *, 4> ParameterLists;

	if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
	D = TD->getTemplatedDecl();

	if (auto *PSD = dyn_cast<ClassTemplatePartialSpecializationDecl>(D))
	ParameterLists.push_back(PSD->getTemplateParameters());

	if (DeclaratorDecl *DD = dyn_cast<DeclaratorDecl>(D)) {
	for (unsigned i = 0; i < DD->getNumTemplateParameterLists(); ++i)
	ParameterLists.push_back(DD->getTemplateParameterList(i));

	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
	if (FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate())
	ParameterLists.push_back(FTD->getTemplateParameters());
	}
	}

	if (TagDecl *TD = dyn_cast<TagDecl>(D)) {
	for (unsigned i = 0; i < TD->getNumTemplateParameterLists(); ++i)
	ParameterLists.push_back(TD->getTemplateParameterList(i));

	if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(TD)) {
	if (ClassTemplateDecl *CTD = RD->getDescribedClassTemplate())
	ParameterLists.push_back(CTD->getTemplateParameters());
	}
	}

	unsigned Count = 0;
	for (TemplateParameterList *Params : ParameterLists) {
	if (Params->size() > 0)
	// Ignore explicit specializations; they don't contribute to the template
	// depth.
	++Count;
	for (NamedDecl Param : Params) {
	if (Param->getDeclName()) {
	S->AddDecl(Param);
	IdResolver.AddDecl(Param);
	}
	}
	}

	return Count;
	}

	void Sema::ActOnStartDelayedMemberDeclarations(Scope S, Decl RecordD) {
	if (!RecordD) return;
	AdjustDeclIfTemplate(RecordD);
	CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordD);
	PushDeclContext(S, Record);
	}

	void Sema::ActOnFinishDelayedMemberDeclarations(Scope S, Decl RecordD) {
	if (!RecordD) return;
	PopDeclContext();
	}

	/// This is used to implement the constant expression evaluation part of the
	/// attribute enable_if extension. There is nothing in standard C++ which would
	/// require reentering parameters.
	void Sema::ActOnReenterCXXMethodParameter(Scope S, ParmVarDecl Param) {
	if (!Param)
	return;

	S->AddDecl(Param);
	if (Param->getDeclName())
	IdResolver.AddDecl(Param);
	}

	/// ActOnStartDelayedCXXMethodDeclaration - We have completed
	/// parsing a top-level (non-nested) C++ class, and we are now
	/// parsing those parts of the given Method declaration that could
	/// not be parsed earlier (C++ [class.mem]p2), such as default
	/// arguments. This action should enter the scope of the given
	/// Method declaration as if we had just parsed the qualified method
	/// name. However, it should not bring the parameters into scope;
	/// that will be performed by ActOnDelayedCXXMethodParameter.
	void Sema::ActOnStartDelayedCXXMethodDeclaration(Scope S, Decl MethodD) {
	}

	/// ActOnDelayedCXXMethodParameter - We've already started a delayed
	/// C++ method declaration. We're (re-)introducing the given
	/// function parameter into scope for use in parsing later parts of
	/// the method declaration. For example, we could see an
	/// ActOnParamDefaultArgument event for this parameter.
	void Sema::ActOnDelayedCXXMethodParameter(Scope S, Decl ParamD) {
	if (!ParamD)
	return;

	ParmVarDecl *Param = cast<ParmVarDecl>(ParamD);

	// If this parameter has an unparsed default argument, clear it out
	// to make way for the parsed default argument.
	if (Param->hasUnparsedDefaultArg())
	Param->setDefaultArg(nullptr);

	S->AddDecl(Param);
	if (Param->getDeclName())
	IdResolver.AddDecl(Param);
	}

	/// ActOnFinishDelayedCXXMethodDeclaration - We have finished
	/// processing the delayed method declaration for Method. The method
	/// declaration is now considered finished. There may be a separate
	/// ActOnStartOfFunctionDef action later (not necessarily
	/// immediately!) for this method, if it was also defined inside the
	/// class body.
	void Sema::ActOnFinishDelayedCXXMethodDeclaration(Scope S, Decl MethodD) {
	if (!MethodD)
	return;

	AdjustDeclIfTemplate(MethodD);

	FunctionDecl *Method = cast<FunctionDecl>(MethodD);

	// Now that we have our default arguments, check the constructor
	// again. It could produce additional diagnostics or affect whether
	// the class has implicitly-declared destructors, among other
	// things.
	if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(Method))
	CheckConstructor(Constructor);

	// Check the default arguments, which we may have added.
	if (!Method->isInvalidDecl())
	CheckCXXDefaultArguments(Method);
	}

	/// CheckConstructorDeclarator - Called by ActOnDeclarator to check
	/// the well-formedness of the constructor declarator @p D with type @p
	/// R. If there are any errors in the declarator, this routine will
	/// emit diagnostics and set the invalid bit to true. In any case, the type
	/// will be updated to reflect a well-formed type for the constructor and
	/// returned.
	QualType Sema::CheckConstructorDeclarator(Declarator &D, QualType R,
	StorageClass &SC) {
	bool isVirtual = D.getDeclSpec().isVirtualSpecified();

	// C++ [class.ctor]p3:
	// A constructor shall not be virtual (10.3) or static (9.4). A
	// constructor can be invoked for a const, volatile or const
	// volatile object. A constructor shall not be declared const,
	// volatile, or const volatile (9.3.2).
	if (isVirtual) {
	if (!D.isInvalidType())
	Diag(D.getIdentifierLoc(), diag::err_constructor_cannot_be)
	<< "virtual" << SourceRange(D.getDeclSpec().getVirtualSpecLoc())
	<< SourceRange(D.getIdentifierLoc());
	D.setInvalidType();
	}
	if (SC == SC_Static) {
	if (!D.isInvalidType())
	Diag(D.getIdentifierLoc(), diag::err_constructor_cannot_be)
	<< "static" << SourceRange(D.getDeclSpec().getStorageClassSpecLoc())
	<< SourceRange(D.getIdentifierLoc());
	D.setInvalidType();
	SC = SC_None;
	}

	if (unsigned TypeQuals = D.getDeclSpec().getTypeQualifiers()) {
	diagnoseIgnoredQualifiers(
	diag::err_constructor_return_type, TypeQuals, SourceLocation(),
	D.getDeclSpec().getConstSpecLoc(), D.getDeclSpec().getVolatileSpecLoc(),
	D.getDeclSpec().getRestrictSpecLoc(),
	D.getDeclSpec().getAtomicSpecLoc());
	D.setInvalidType();
	}

	DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
	if (FTI.TypeQuals != 0) {
	if (FTI.TypeQuals & Qualifiers::Const)
	Diag(D.getIdentifierLoc(), diag::err_invalid_qualified_constructor)
	<< "const" << SourceRange(D.getIdentifierLoc());
	if (FTI.TypeQuals & Qualifiers::Volatile)
	Diag(D.getIdentifierLoc(), diag::err_invalid_qualified_constructor)
	<< "volatile" << SourceRange(D.getIdentifierLoc());
	if (FTI.TypeQuals & Qualifiers::Restrict)
	Diag(D.getIdentifierLoc(), diag::err_invalid_qualified_constructor)
	<< "restrict" << SourceRange(D.getIdentifierLoc());
	D.setInvalidType();
	}

	// C++0x [class.ctor]p4:
	// A constructor shall not be declared with a ref-qualifier.
	if (FTI.hasRefQualifier()) {
	Diag(FTI.getRefQualifierLoc(), diag::err_ref_qualifier_constructor)
	<< FTI.RefQualifierIsLValueRef
	<< FixItHint::CreateRemoval(FTI.getRefQualifierLoc());
	D.setInvalidType();
	}

	// Rebuild the function type "R" without any type qualifiers (in
	// case any of the errors above fired) and with "void" as the
	// return type, since constructors don't have return types.
	const FunctionProtoType *Proto = R->getAs<FunctionProtoType>();
	if (Proto->getReturnType() == Context.VoidTy && !D.isInvalidType())
	return R;

	FunctionProtoType::ExtProtoInfo EPI = Proto->getExtProtoInfo();
	EPI.TypeQuals = 0;
	EPI.RefQualifier = RQ_None;

	return Context.getFunctionType(Context.VoidTy, Proto->getParamTypes(), EPI);
	}

	/// CheckConstructor - Checks a fully-formed constructor for
	/// well-formedness, issuing any diagnostics required. Returns true if
	/// the constructor declarator is invalid.
	void Sema::CheckConstructor(CXXConstructorDecl *Constructor) {
	CXXRecordDecl *ClassDecl
	= dyn_cast<CXXRecordDecl>(Constructor->getDeclContext());
	if (!ClassDecl)
	return Constructor->setInvalidDecl();

	// C++ [class.copy]p3:
	// A declaration of a constructor for a class X is ill-formed if
	// its first parameter is of type (optionally cv-qualified) X and
	// either there are no other parameters or else all other
	// parameters have default arguments.
	if (!Constructor->isInvalidDecl() &&
	((Constructor->getNumParams() == 1) \|\|
	(Constructor->getNumParams() > 1 &&
	Constructor->getParamDecl(1)->hasDefaultArg())) &&
	Constructor->getTemplateSpecializationKind()
	!= TSK_ImplicitInstantiation) {
	QualType ParamType = Constructor->getParamDecl(0)->getType();
	QualType ClassTy = Context.getTagDeclType(ClassDecl);
	if (Context.getCanonicalType(ParamType).getUnqualifiedType() == ClassTy) {
	SourceLocation ParamLoc = Constructor->getParamDecl(0)->getLocation();
	const char *ConstRef
	= Constructor->getParamDecl(0)->getIdentifier() ? "const &"
	: " const &";
	Diag(ParamLoc, diag::err_constructor_byvalue_arg)
	<< FixItHint::CreateInsertion(ParamLoc, ConstRef);

	// FIXME: Rather that making the constructor invalid, we should endeavor
	// to fix the type.
	Constructor->setInvalidDecl();
	}
	}
	}

	/// CheckDestructor - Checks a fully-formed destructor definition for
	/// well-formedness, issuing any diagnostics required. Returns true
	/// on error.
	bool Sema::CheckDestructor(CXXDestructorDecl *Destructor) {
	CXXRecordDecl *RD = Destructor->getParent();

	if (!Destructor->getOperatorDelete() && Destructor->isVirtual()) {
	SourceLocation Loc;

	if (!Destructor->isImplicit())
	Loc = Destructor->getLocation();
	else
	Loc = RD->getLocation();

	// If we have a virtual destructor, look up the deallocation function
	if (FunctionDecl *OperatorDelete =
	FindDeallocationFunctionForDestructor(Loc, RD)) {
	MarkFunctionReferenced(Loc, OperatorDelete);
	Destructor->setOperatorDelete(OperatorDelete);
	}
	}

	return false;
	}

	/// CheckDestructorDeclarator - Called by ActOnDeclarator to check
	/// the well-formednes of the destructor declarator @p D with type @p
	/// R. If there are any errors in the declarator, this routine will
	/// emit diagnostics and set the declarator to invalid. Even if this happens,
	/// will be updated to reflect a well-formed type for the destructor and
	/// returned.
	QualType Sema::CheckDestructorDeclarator(Declarator &D, QualType R,
	StorageClass& SC) {
	// C++ [class.dtor]p1:
	// [...] A typedef-name that names a class is a class-name
	// (7.1.3); however, a typedef-name that names a class shall not
	// be used as the identifier in the declarator for a destructor
	// declaration.
	QualType DeclaratorType = GetTypeFromParser(D.getName().DestructorName);
	if (const TypedefType *TT = DeclaratorType->getAs<TypedefType>())
	Diag(D.getIdentifierLoc(), diag::err_destructor_typedef_name)
	<< DeclaratorType << isa<TypeAliasDecl>(TT->getDecl());
	else if (const TemplateSpecializationType *TST =
	DeclaratorType->getAs<TemplateSpecializationType>())
	if (TST->isTypeAlias())
	Diag(D.getIdentifierLoc(), diag::err_destructor_typedef_name)
	<< DeclaratorType << 1;

	// C++ [class.dtor]p2:
	// A destructor is used to destroy objects of its class type. A
	// destructor takes no parameters, and no return type can be
	// specified for it (not even void). The address of a destructor
	// shall not be taken. A destructor shall not be static. A
	// destructor can be invoked for a const, volatile or const
	// volatile object. A destructor shall not be declared const,
	// volatile or const volatile (9.3.2).
	if (SC == SC_Static) {
	if (!D.isInvalidType())
	Diag(D.getIdentifierLoc(), diag::err_destructor_cannot_be)
	<< "static" << SourceRange(D.getDeclSpec().getStorageClassSpecLoc())
	<< SourceRange(D.getIdentifierLoc())
	<< FixItHint::CreateRemoval(D.getDeclSpec().getStorageClassSpecLoc());

	SC = SC_None;
	}
	if (!D.isInvalidType()) {
	// Destructors don't have return types, but the parser will
	// happily parse something like:
	//
	// class X {
	// float ~X();
	// };
	//
	// The return type will be eliminated later.
	if (D.getDeclSpec().hasTypeSpecifier())
	Diag(D.getIdentifierLoc(), diag::err_destructor_return_type)
	<< SourceRange(D.getDeclSpec().getTypeSpecTypeLoc())
	<< SourceRange(D.getIdentifierLoc());
	else if (unsigned TypeQuals = D.getDeclSpec().getTypeQualifiers()) {
	diagnoseIgnoredQualifiers(diag::err_destructor_return_type, TypeQuals,
	SourceLocation(),
	D.getDeclSpec().getConstSpecLoc(),
	D.getDeclSpec().getVolatileSpecLoc(),
	D.getDeclSpec().getRestrictSpecLoc(),
	D.getDeclSpec().getAtomicSpecLoc());
	D.setInvalidType();
	}
	}

	DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
	if (FTI.TypeQuals != 0 && !D.isInvalidType()) {
	if (FTI.TypeQuals & Qualifiers::Const)
	Diag(D.getIdentifierLoc(), diag::err_invalid_qualified_destructor)
	<< "const" << SourceRange(D.getIdentifierLoc());
	if (FTI.TypeQuals & Qualifiers::Volatile)
	Diag(D.getIdentifierLoc(), diag::err_invalid_qualified_destructor)
	<< "volatile" << SourceRange(D.getIdentifierLoc());
	if (FTI.TypeQuals & Qualifiers::Restrict)
	Diag(D.getIdentifierLoc(), diag::err_invalid_qualified_destructor)
	<< "restrict" << SourceRange(D.getIdentifierLoc());
	D.setInvalidType();
	}

	// C++0x [class.dtor]p2:
	// A destructor shall not be declared with a ref-qualifier.
	if (FTI.hasRefQualifier()) {
	Diag(FTI.getRefQualifierLoc(), diag::err_ref_qualifier_destructor)
	<< FTI.RefQualifierIsLValueRef
	<< FixItHint::CreateRemoval(FTI.getRefQualifierLoc());
	D.setInvalidType();
	}

	// Make sure we don't have any parameters.
	if (FTIHasNonVoidParameters(FTI)) {
	Diag(D.getIdentifierLoc(), diag::err_destructor_with_params);

	// Delete the parameters.
	FTI.freeParams();
	D.setInvalidType();
	}

	// Make sure the destructor isn't variadic.
	if (FTI.isVariadic) {
	Diag(D.getIdentifierLoc(), diag::err_destructor_variadic);
	D.setInvalidType();
	}

	// Rebuild the function type "R" without any type qualifiers or
	// parameters (in case any of the errors above fired) and with
	// "void" as the return type, since destructors don't have return
	// types.
	if (!D.isInvalidType())
	return R;

	const FunctionProtoType *Proto = R->getAs<FunctionProtoType>();
	FunctionProtoType::ExtProtoInfo EPI = Proto->getExtProtoInfo();
	EPI.Variadic = false;
	EPI.TypeQuals = 0;
	EPI.RefQualifier = RQ_None;
	return Context.getFunctionType(Context.VoidTy, None, EPI);
	}

	static void extendLeft(SourceRange &R, SourceRange Before) {
	if (Before.isInvalid())
	return;
	R.setBegin(Before.getBegin());
	if (R.getEnd().isInvalid())
	R.setEnd(Before.getEnd());
	}

	static void extendRight(SourceRange &R, SourceRange After) {
	if (After.isInvalid())
	return;
	if (R.getBegin().isInvalid())
	R.setBegin(After.getBegin());
	R.setEnd(After.getEnd());
	}

	/// CheckConversionDeclarator - Called by ActOnDeclarator to check the
	/// well-formednes of the conversion function declarator @p D with
	/// type @p R. If there are any errors in the declarator, this routine
	/// will emit diagnostics and return true. Otherwise, it will return
	/// false. Either way, the type @p R will be updated to reflect a
	/// well-formed type for the conversion operator.
	void Sema::CheckConversionDeclarator(Declarator &D, QualType &R,
	StorageClass& SC) {
	// C++ [class.conv.fct]p1:
	// Neither parameter types nor return type can be specified. The
	// type of a conversion function (8.3.5) is "function taking no
	// parameter returning conversion-type-id."
	if (SC == SC_Static) {
	if (!D.isInvalidType())
	Diag(D.getIdentifierLoc(), diag::err_conv_function_not_member)
	<< SourceRange(D.getDeclSpec().getStorageClassSpecLoc())
	<< D.getName().getSourceRange();
	D.setInvalidType();
	SC = SC_None;
	}

	TypeSourceInfo *ConvTSI = nullptr;
	QualType ConvType =
	GetTypeFromParser(D.getName().ConversionFunctionId, &ConvTSI);

	if (D.getDeclSpec().hasTypeSpecifier() && !D.isInvalidType()) {
	// Conversion functions don't have return types, but the parser will
	// happily parse something like:
	//
	// class X {
	// float operator bool();
	// };
	//
	// The return type will be changed later anyway.
	Diag(D.getIdentifierLoc(), diag::err_conv_function_return_type)
	<< SourceRange(D.getDeclSpec().getTypeSpecTypeLoc())
	<< SourceRange(D.getIdentifierLoc());
	D.setInvalidType();
	}

	const FunctionProtoType *Proto = R->getAs<FunctionProtoType>();

	// Make sure we don't have any parameters.
	if (Proto->getNumParams() > 0) {
	Diag(D.getIdentifierLoc(), diag::err_conv_function_with_params);

	// Delete the parameters.
	D.getFunctionTypeInfo().freeParams();
	D.setInvalidType();
	} else if (Proto->isVariadic()) {
	Diag(D.getIdentifierLoc(), diag::err_conv_function_variadic);
	D.setInvalidType();
	}

	// Diagnose "&operator bool()" and other such nonsense. This
	// is actually a gcc extension which we don't support.
	if (Proto->getReturnType() != ConvType) {
	bool NeedsTypedef = false;
	SourceRange Before, After;

	// Walk the chunks and extract information on them for our diagnostic.
	bool PastFunctionChunk = false;
	for (auto &Chunk : D.type_objects()) {
	switch (Chunk.Kind) {
	case DeclaratorChunk::Function:
	if (!PastFunctionChunk) {
	if (Chunk.Fun.HasTrailingReturnType) {
	TypeSourceInfo *TRT = nullptr;
	GetTypeFromParser(Chunk.Fun.getTrailingReturnType(), &TRT);
	if (TRT) extendRight(After, TRT->getTypeLoc().getSourceRange());
	}
	PastFunctionChunk = true;
	break;
	}
	// Fall through.
	case DeclaratorChunk::Array:
	NeedsTypedef = true;
	extendRight(After, Chunk.getSourceRange());
	break;

	case DeclaratorChunk::Pointer:
	case DeclaratorChunk::BlockPointer:
	case DeclaratorChunk::Reference:
	case DeclaratorChunk::MemberPointer:
	case DeclaratorChunk::Pipe:
	extendLeft(Before, Chunk.getSourceRange());
	break;

	case DeclaratorChunk::Paren:
	extendLeft(Before, Chunk.Loc);
	extendRight(After, Chunk.EndLoc);
	break;
	}
	}

	SourceLocation Loc = Before.isValid() ? Before.getBegin() :
	After.isValid() ? After.getBegin() :
	D.getIdentifierLoc();
	auto &&DB = Diag(Loc, diag::err_conv_function_with_complex_decl);
	DB << Before << After;

	if (!NeedsTypedef) {
	DB << /don't need a typedef/0;

	// If we can provide a correct fix-it hint, do so.
	if (After.isInvalid() && ConvTSI) {
	SourceLocation InsertLoc =
	getLocForEndOfToken(ConvTSI->getTypeLoc().getLocEnd());
	DB << FixItHint::CreateInsertion(InsertLoc, " ")
	<< FixItHint::CreateInsertionFromRange(
	InsertLoc, CharSourceRange::getTokenRange(Before))
	<< FixItHint::CreateRemoval(Before);
	}
	} else if (!Proto->getReturnType()->isDependentType()) {
	DB << /typedef/1 << Proto->getReturnType();
	} else if (getLangOpts().CPlusPlus11) {
	DB << /alias template/2 << Proto->getReturnType();
	} else {
	DB << /might not be fixable/3;
	}

	// Recover by incorporating the other type chunks into the result type.
	// Note, this does not change the name of the function. This is compatible
	// with the GCC extension:
	// struct S { &operator int(); } s;
	// int &r = s.operator int(); // ok in GCC
	// S::operator int&() {} // error in GCC, function name is 'operator int'.
	ConvType = Proto->getReturnType();
	}

	// C++ [class.conv.fct]p4:
	// The conversion-type-id shall not represent a function type nor
	// an array type.
	if (ConvType->isArrayType()) {
	Diag(D.getIdentifierLoc(), diag::err_conv_function_to_array);
	ConvType = Context.getPointerType(ConvType);
	D.setInvalidType();
	} else if (ConvType->isFunctionType()) {
	Diag(D.getIdentifierLoc(), diag::err_conv_function_to_function);
	ConvType = Context.getPointerType(ConvType);
	D.setInvalidType();
	}

	// Rebuild the function type "R" without any parameters (in case any
	// of the errors above fired) and with the conversion type as the
	// return type.
	if (D.isInvalidType())
	R = Context.getFunctionType(ConvType, None, Proto->getExtProtoInfo());

	// C++0x explicit conversion operators.
	if (D.getDeclSpec().isExplicitSpecified())
	Diag(D.getDeclSpec().getExplicitSpecLoc(),
	getLangOpts().CPlusPlus11 ?
	diag::warn_cxx98_compat_explicit_conversion_functions :
	diag::ext_explicit_conversion_functions)
	<< SourceRange(D.getDeclSpec().getExplicitSpecLoc());
	}

	/// ActOnConversionDeclarator - Called by ActOnDeclarator to complete
	/// the declaration of the given C++ conversion function. This routine
	/// is responsible for recording the conversion function in the C++
	/// class, if possible.
	Decl Sema::ActOnConversionDeclarator(CXXConversionDecl Conversion) {
	assert(Conversion && "Expected to receive a conversion function declaration");

	CXXRecordDecl *ClassDecl = cast<CXXRecordDecl>(Conversion->getDeclContext());

	// Make sure we aren't redeclaring the conversion function.
	QualType ConvType = Context.getCanonicalType(Conversion->getConversionType());

	// C++ [class.conv.fct]p1:
	// [...] A conversion function is never used to convert a
	// (possibly cv-qualified) object to the (possibly cv-qualified)
	// same object type (or a reference to it), to a (possibly
	// cv-qualified) base class of that type (or a reference to it),
	// or to (possibly cv-qualified) void.
	// FIXME: Suppress this warning if the conversion function ends up being a
	// virtual function that overrides a virtual function in a base class.
	QualType ClassType
	= Context.getCanonicalType(Context.getTypeDeclType(ClassDecl));
	if (const ReferenceType *ConvTypeRef = ConvType->getAs<ReferenceType>())
	ConvType = ConvTypeRef->getPointeeType();
	if (Conversion->getTemplateSpecializationKind() != TSK_Undeclared &&
	Conversion->getTemplateSpecializationKind() != TSK_ExplicitSpecialization)
	/* Suppress diagnostics for instantiations. */;
	else if (ConvType->isRecordType()) {
	ConvType = Context.getCanonicalType(ConvType).getUnqualifiedType();
	if (ConvType == ClassType)
	Diag(Conversion->getLocation(), diag::warn_conv_to_self_not_used)
	<< ClassType;
	else if (IsDerivedFrom(Conversion->getLocation(), ClassType, ConvType))
	Diag(Conversion->getLocation(), diag::warn_conv_to_base_not_used)
	<< ClassType << ConvType;
	} else if (ConvType->isVoidType()) {
	Diag(Conversion->getLocation(), diag::warn_conv_to_void_not_used)
	<< ClassType << ConvType;
	}

	if (FunctionTemplateDecl *ConversionTemplate
	= Conversion->getDescribedFunctionTemplate())
	return ConversionTemplate;

	return Conversion;
	}

	namespace {
	/// Utility class to accumulate and print a diagnostic listing the invalid
	/// specifier(s) on a declaration.
	struct BadSpecifierDiagnoser {
	BadSpecifierDiagnoser(Sema &S, SourceLocation Loc, unsigned DiagID)
	: S(S), Diagnostic(S.Diag(Loc, DiagID)) {}
	~BadSpecifierDiagnoser() {
	Diagnostic << Specifiers;
	}

	template<typename T> void check(SourceLocation SpecLoc, T Spec) {
	return check(SpecLoc, DeclSpec::getSpecifierName(Spec));
	}
	void check(SourceLocation SpecLoc, DeclSpec::TST Spec) {
	return check(SpecLoc,
	DeclSpec::getSpecifierName(Spec, S.getPrintingPolicy()));
	}
	void check(SourceLocation SpecLoc, const char *Spec) {
	if (SpecLoc.isInvalid()) return;
	Diagnostic << SourceRange(SpecLoc, SpecLoc);
	if (!Specifiers.empty()) Specifiers += " ";
	Specifiers += Spec;
	}

	Sema &S;
	Sema::SemaDiagnosticBuilder Diagnostic;
	std::string Specifiers;
	};
	}

	/// Check the validity of a declarator that we parsed for a deduction-guide.
	/// These aren't actually declarators in the grammar, so we need to check that
	/// the user didn't specify any pieces that are not part of the deduction-guide
	/// grammar.
	void Sema::CheckDeductionGuideDeclarator(Declarator &D, QualType &R,
	StorageClass &SC) {
	TemplateName GuidedTemplate = D.getName().TemplateName.get().get();
	TemplateDecl *GuidedTemplateDecl = GuidedTemplate.getAsTemplateDecl();
	assert(GuidedTemplateDecl && "missing template decl for deduction guide");

	// C++ [temp.deduct.guide]p3:
	// A deduction-gide shall be declared in the same scope as the
	// corresponding class template.
	if (!CurContext->getRedeclContext()->Equals(
	GuidedTemplateDecl->getDeclContext()->getRedeclContext())) {
	Diag(D.getIdentifierLoc(), diag::err_deduction_guide_wrong_scope)
	<< GuidedTemplateDecl;
	Diag(GuidedTemplateDecl->getLocation(), diag::note_template_decl_here);
	}

	auto &DS = D.getMutableDeclSpec();
	// We leave 'friend' and 'virtual' to be rejected in the normal way.
	if (DS.hasTypeSpecifier() \|\| DS.getTypeQualifiers() \|\|
	DS.getStorageClassSpecLoc().isValid() \|\| DS.isInlineSpecified() \|\|
	DS.isNoreturnSpecified() \|\| DS.isConstexprSpecified() \|\|
	DS.isConceptSpecified()) {
	BadSpecifierDiagnoser Diagnoser(
	*this, D.getIdentifierLoc(),
	diag::err_deduction_guide_invalid_specifier);

	Diagnoser.check(DS.getStorageClassSpecLoc(), DS.getStorageClassSpec());
	DS.ClearStorageClassSpecs();
	SC = SC_None;

	// 'explicit' is permitted.
	Diagnoser.check(DS.getInlineSpecLoc(), "inline");
	Diagnoser.check(DS.getNoreturnSpecLoc(), "_Noreturn");
	Diagnoser.check(DS.getConstexprSpecLoc(), "constexpr");
	Diagnoser.check(DS.getConceptSpecLoc(), "concept");
	DS.ClearConstexprSpec();
	DS.ClearConceptSpec();

	Diagnoser.check(DS.getConstSpecLoc(), "const");
	Diagnoser.check(DS.getRestrictSpecLoc(), "__restrict");
	Diagnoser.check(DS.getVolatileSpecLoc(), "volatile");
	Diagnoser.check(DS.getAtomicSpecLoc(), "_Atomic");
	Diagnoser.check(DS.getUnalignedSpecLoc(), "__unaligned");
	DS.ClearTypeQualifiers();

	Diagnoser.check(DS.getTypeSpecComplexLoc(), DS.getTypeSpecComplex());
	Diagnoser.check(DS.getTypeSpecSignLoc(), DS.getTypeSpecSign());
	Diagnoser.check(DS.getTypeSpecWidthLoc(), DS.getTypeSpecWidth());
	Diagnoser.check(DS.getTypeSpecTypeLoc(), DS.getTypeSpecType());
	DS.ClearTypeSpecType();
	}

	if (D.isInvalidType())
	return;

	// Check the declarator is simple enough.
	bool FoundFunction = false;
	for (const DeclaratorChunk &Chunk : llvm::reverse(D.type_objects())) {
	if (Chunk.Kind == DeclaratorChunk::Paren)
	continue;
	if (Chunk.Kind != DeclaratorChunk::Function \|\| FoundFunction) {
	Diag(D.getDeclSpec().getLocStart(),
	diag::err_deduction_guide_with_complex_decl)
	<< D.getSourceRange();
	break;
	}
	if (!Chunk.Fun.hasTrailingReturnType()) {
	Diag(D.getName().getLocStart(),
	diag::err_deduction_guide_no_trailing_return_type);
	break;
	}

	// Check that the return type is written as a specialization of
	// the template specified as the deduction-guide's name.
	ParsedType TrailingReturnType = Chunk.Fun.getTrailingReturnType();
	TypeSourceInfo *TSI = nullptr;
	QualType RetTy = GetTypeFromParser(TrailingReturnType, &TSI);
	assert(TSI && "deduction guide has valid type but invalid return type?");
	bool AcceptableReturnType = false;
	bool MightInstantiateToSpecialization = false;
	if (auto RetTST =
	TSI->getTypeLoc().getAs<TemplateSpecializationTypeLoc>()) {
	TemplateName SpecifiedName = RetTST.getTypePtr()->getTemplateName();
	bool TemplateMatches =
	Context.hasSameTemplateName(SpecifiedName, GuidedTemplate);
	if (SpecifiedName.getKind() == TemplateName::Template && TemplateMatches)
	AcceptableReturnType = true;
	else {
	// This could still instantiate to the right type, unless we know it
	// names the wrong class template.
	auto *TD = SpecifiedName.getAsTemplateDecl();
	MightInstantiateToSpecialization = !(TD && isa<ClassTemplateDecl>(TD) &&
	!TemplateMatches);
	}
	} else if (!RetTy.hasQualifiers() && RetTy->isDependentType()) {
	MightInstantiateToSpecialization = true;
	}

	if (!AcceptableReturnType) {
	Diag(TSI->getTypeLoc().getLocStart(),
	diag::err_deduction_guide_bad_trailing_return_type)
	<< GuidedTemplate << TSI->getType() << MightInstantiateToSpecialization
	<< TSI->getTypeLoc().getSourceRange();
	}

	// Keep going to check that we don't have any inner declarator pieces (we
	// could still have a function returning a pointer to a function).
	FoundFunction = true;
	}

	if (D.isFunctionDefinition())
	Diag(D.getIdentifierLoc(), diag::err_deduction_guide_defines_function);
	}

	//===----------------------------------------------------------------------===//
	// Namespace Handling
	//===----------------------------------------------------------------------===//

	/// \brief Diagnose a mismatch in 'inline' qualifiers when a namespace is
	/// reopened.
	static void DiagnoseNamespaceInlineMismatch(Sema &S, SourceLocation KeywordLoc,
	SourceLocation Loc,
	IdentifierInfo II, bool IsInline,
	NamespaceDecl *PrevNS) {
	assert(*IsInline != PrevNS->isInline());

	// HACK: Work around a bug in libstdc++4.6's <atomic>, where
	// std::__atomic[0,1,2] are defined as non-inline namespaces, then reopened as
	// inline namespaces, with the intention of bringing names into namespace std.
	//
	// We support this just well enough to get that case working; this is not
	// sufficient to support reopening namespaces as inline in general.
	if (*IsInline && II && II->getName().startswith("__atomic") &&
	S.getSourceManager().isInSystemHeader(Loc)) {
	// Mark all prior declarations of the namespace as inline.
	for (NamespaceDecl *NS = PrevNS->getMostRecentDecl(); NS;
	NS = NS->getPreviousDecl())
	NS->setInline(*IsInline);
	// Patch up the lookup table for the containing namespace. This isn't really
	// correct, but it's good enough for this particular case.
	for (auto *I : PrevNS->decls())
	if (auto *ND = dyn_cast<NamedDecl>(I))
	PrevNS->getParent()->makeDeclVisibleInContext(ND);
	return;
	}

	if (PrevNS->isInline())
	// The user probably just forgot the 'inline', so suggest that it
	// be added back.
	S.Diag(Loc, diag::warn_inline_namespace_reopened_noninline)
	<< FixItHint::CreateInsertion(KeywordLoc, "inline ");
	else
	S.Diag(Loc, diag::err_inline_namespace_mismatch);

	S.Diag(PrevNS->getLocation(), diag::note_previous_definition);
	*IsInline = PrevNS->isInline();
	}

	/// ActOnStartNamespaceDef - This is called at the start of a namespace
	/// definition.
	Decl Sema::ActOnStartNamespaceDef(Scope NamespcScope,
	SourceLocation InlineLoc,
	SourceLocation NamespaceLoc,
	SourceLocation IdentLoc,
	IdentifierInfo *II,
	SourceLocation LBrace,
	AttributeList *AttrList,
	UsingDirectiveDecl *&UD) {
	SourceLocation StartLoc = InlineLoc.isValid() ? InlineLoc : NamespaceLoc;
	// For anonymous namespace, take the location of the left brace.
	SourceLocation Loc = II ? IdentLoc : LBrace;
	bool IsInline = InlineLoc.isValid();
	bool IsInvalid = false;
	bool IsStd = false;
	bool AddToKnown = false;
	Scope *DeclRegionScope = NamespcScope->getParent();

	NamespaceDecl *PrevNS = nullptr;
	if (II) {
	// C++ [namespace.def]p2:
	// The identifier in an original-namespace-definition shall not
	// have been previously defined in the declarative region in
	// which the original-namespace-definition appears. The
	// identifier in an original-namespace-definition is the name of
	// the namespace. Subsequently in that declarative region, it is
	// treated as an original-namespace-name.
	//
	// Since namespace names are unique in their scope, and we don't
	// look through using directives, just look for any ordinary names
	// as if by qualified name lookup.
	LookupResult R(*this, II, IdentLoc, LookupOrdinaryName, ForRedeclaration);
	LookupQualifiedName(R, CurContext->getRedeclContext());
	NamedDecl *PrevDecl =
	R.isSingleResult() ? R.getRepresentativeDecl() : nullptr;
	PrevNS = dyn_cast_or_null<NamespaceDecl>(PrevDecl);

	if (PrevNS) {
	// This is an extended namespace definition.
	if (IsInline != PrevNS->isInline())
	DiagnoseNamespaceInlineMismatch(*this, NamespaceLoc, Loc, II,
	&IsInline, PrevNS);
	} else if (PrevDecl) {
	// This is an invalid name redefinition.
	Diag(Loc, diag::err_redefinition_different_kind)
	<< II;
	Diag(PrevDecl->getLocation(), diag::note_previous_definition);
	IsInvalid = true;
	// Continue on to push Namespc as current DeclContext and return it.
	} else if (II->isStr("std") &&
	CurContext->getRedeclContext()->isTranslationUnit()) {
	// This is the first "real" definition of the namespace "std", so update
	// our cache of the "std" namespace to point at this definition.
	PrevNS = getStdNamespace();
	IsStd = true;
	AddToKnown = !IsInline;
	} else {
	// We've seen this namespace for the first time.
	AddToKnown = !IsInline;
	}
	} else {
	// Anonymous namespaces.

	// Determine whether the parent already has an anonymous namespace.
	DeclContext *Parent = CurContext->getRedeclContext();
	if (TranslationUnitDecl *TU = dyn_cast<TranslationUnitDecl>(Parent)) {
	PrevNS = TU->getAnonymousNamespace();
	} else {
	NamespaceDecl *ND = cast<NamespaceDecl>(Parent);
	PrevNS = ND->getAnonymousNamespace();
	}

	if (PrevNS && IsInline != PrevNS->isInline())
	DiagnoseNamespaceInlineMismatch(*this, NamespaceLoc, NamespaceLoc, II,
	&IsInline, PrevNS);
	}

	NamespaceDecl *Namespc = NamespaceDecl::Create(Context, CurContext, IsInline,
	StartLoc, Loc, II, PrevNS);
	if (IsInvalid)
	Namespc->setInvalidDecl();

	ProcessDeclAttributeList(DeclRegionScope, Namespc, AttrList);
	AddPragmaAttributes(DeclRegionScope, Namespc);

	// FIXME: Should we be merging attributes?
	if (const VisibilityAttr *Attr = Namespc->getAttr<VisibilityAttr>())
	PushNamespaceVisibilityAttr(Attr, Loc);

	if (IsStd)
	StdNamespace = Namespc;
	if (AddToKnown)
	KnownNamespaces[Namespc] = false;

	if (II) {
	PushOnScopeChains(Namespc, DeclRegionScope);
	} else {
	// Link the anonymous namespace into its parent.
	DeclContext *Parent = CurContext->getRedeclContext();
	if (TranslationUnitDecl *TU = dyn_cast<TranslationUnitDecl>(Parent)) {
	TU->setAnonymousNamespace(Namespc);
	} else {
	cast<NamespaceDecl>(Parent)->setAnonymousNamespace(Namespc);
	}

	CurContext->addDecl(Namespc);

	// C++ [namespace.unnamed]p1. An unnamed-namespace-definition
	// behaves as if it were replaced by
	// namespace unique { /* empty body */ }
	// using namespace unique;
	// namespace unique { namespace-body }
	// where all occurrences of 'unique' in a translation unit are
	// replaced by the same identifier and this identifier differs
	// from all other identifiers in the entire program.

	// We just create the namespace with an empty name and then add an
	// implicit using declaration, just like the standard suggests.
	//
	// CodeGen enforces the "universally unique" aspect by giving all
	// declarations semantically contained within an anonymous
	// namespace internal linkage.

	if (!PrevNS) {
	UD = UsingDirectiveDecl::Create(Context, Parent,
	/* 'using' */ LBrace,
	/* 'namespace' */ SourceLocation(),
	/* qualifier */ NestedNameSpecifierLoc(),
	/* identifier */ SourceLocation(),
	Namespc,
	/* Ancestor */ Parent);
	UD->setImplicit();
	Parent->addDecl(UD);
	}
	}

	ActOnDocumentableDecl(Namespc);

	// Although we could have an invalid decl (i.e. the namespace name is a
	// redefinition), push it as current DeclContext and try to continue parsing.
	// FIXME: We should be able to push Namespc here, so that the each DeclContext
	// for the namespace has the declarations that showed up in that particular
	// namespace definition.
	PushDeclContext(NamespcScope, Namespc);
	return Namespc;
	}

	/// getNamespaceDecl - Returns the namespace a decl represents. If the decl
	/// is a namespace alias, returns the namespace it points to.
	static inline NamespaceDecl getNamespaceDecl(NamedDecl D) {
	if (NamespaceAliasDecl *AD = dyn_cast_or_null<NamespaceAliasDecl>(D))
	return AD->getNamespace();
	return dyn_cast_or_null<NamespaceDecl>(D);
	}

	/// ActOnFinishNamespaceDef - This callback is called after a namespace is
	/// exited. Decl is the DeclTy returned by ActOnStartNamespaceDef.
	void Sema::ActOnFinishNamespaceDef(Decl *Dcl, SourceLocation RBrace) {
	NamespaceDecl *Namespc = dyn_cast_or_null<NamespaceDecl>(Dcl);
	assert(Namespc && "Invalid parameter, expected NamespaceDecl");
	Namespc->setRBraceLoc(RBrace);
	PopDeclContext();
	if (Namespc->hasAttr<VisibilityAttr>())
	PopPragmaVisibility(true, RBrace);
	}

	CXXRecordDecl *Sema::getStdBadAlloc() const {
	return cast_or_null<CXXRecordDecl>(
	StdBadAlloc.get(Context.getExternalSource()));
	}

	EnumDecl *Sema::getStdAlignValT() const {
	return cast_or_null<EnumDecl>(StdAlignValT.get(Context.getExternalSource()));
	}

	NamespaceDecl *Sema::getStdNamespace() const {
	return cast_or_null<NamespaceDecl>(
	StdNamespace.get(Context.getExternalSource()));
	}

	NamespaceDecl *Sema::lookupStdExperimentalNamespace() {
	if (!StdExperimentalNamespaceCache) {
	if (auto Std = getStdNamespace()) {
	LookupResult Result(*this, &PP.getIdentifierTable().get("experimental"),
	SourceLocation(), LookupNamespaceName);
	if (!LookupQualifiedName(Result, Std) \|\|
	!(StdExperimentalNamespaceCache =
	Result.getAsSingle<NamespaceDecl>()))
	Result.suppressDiagnostics();
	}
	}
	return StdExperimentalNamespaceCache;
	}

	/// \brief Retrieve the special "std" namespace, which may require us to
	/// implicitly define the namespace.
	NamespaceDecl *Sema::getOrCreateStdNamespace() {
	if (!StdNamespace) {
	// The "std" namespace has not yet been defined, so build one implicitly.
	StdNamespace = NamespaceDecl::Create(Context,
	Context.getTranslationUnitDecl(),
	/Inline=/false,
	SourceLocation(), SourceLocation(),
	&PP.getIdentifierTable().get("std"),
	/PrevDecl=/nullptr);
	getStdNamespace()->setImplicit(true);
	}

	return getStdNamespace();
	}

	bool Sema::isStdInitializerList(QualType Ty, QualType *Element) {
	assert(getLangOpts().CPlusPlus &&
	"Looking for std::initializer_list outside of C++.");

	// We're looking for implicit instantiations of
	// template <typename E> class std::initializer_list.

	if (!StdNamespace) // If we haven't seen namespace std yet, this can't be it.
	return false;

	ClassTemplateDecl *Template = nullptr;
	const TemplateArgument *Arguments = nullptr;

	if (const RecordType *RT = Ty->getAs<RecordType>()) {

	ClassTemplateSpecializationDecl *Specialization =
	dyn_cast<ClassTemplateSpecializationDecl>(RT->getDecl());
	if (!Specialization)
	return false;

	Template = Specialization->getSpecializedTemplate();
	Arguments = Specialization->getTemplateArgs().data();
	} else if (const TemplateSpecializationType *TST =
	Ty->getAs<TemplateSpecializationType>()) {
	Template = dyn_cast_or_null<ClassTemplateDecl>(
	TST->getTemplateName().getAsTemplateDecl());
	Arguments = TST->getArgs();
	}
	if (!Template)
	return false;

	if (!StdInitializerList) {
	// Haven't recognized std::initializer_list yet, maybe this is it.
	CXXRecordDecl *TemplateClass = Template->getTemplatedDecl();
	if (TemplateClass->getIdentifier() !=
	&PP.getIdentifierTable().get("initializer_list") \|\|
	!getStdNamespace()->InEnclosingNamespaceSetOf(
	TemplateClass->getDeclContext()))
	return false;
	// This is a template called std::initializer_list, but is it the right
	// template?
	TemplateParameterList *Params = Template->getTemplateParameters();
	if (Params->getMinRequiredArguments() != 1)
	return false;
	if (!isa<TemplateTypeParmDecl>(Params->getParam(0)))
	return false;

	// It's the right template.
	StdInitializerList = Template;
	}

	if (Template->getCanonicalDecl() != StdInitializerList->getCanonicalDecl())
	return false;

	// This is an instance of std::initializer_list. Find the argument type.
	if (Element)
	*Element = Arguments[0].getAsType();
	return true;
	}

	static ClassTemplateDecl *LookupStdInitializerList(Sema &S, SourceLocation Loc){
	NamespaceDecl *Std = S.getStdNamespace();
	if (!Std) {
	S.Diag(Loc, diag::err_implied_std_initializer_list_not_found);
	return nullptr;
	}

	LookupResult Result(S, &S.PP.getIdentifierTable().get("initializer_list"),
	Loc, Sema::LookupOrdinaryName);
	if (!S.LookupQualifiedName(Result, Std)) {
	S.Diag(Loc, diag::err_implied_std_initializer_list_not_found);
	return nullptr;
	}
	ClassTemplateDecl *Template = Result.getAsSingle<ClassTemplateDecl>();
	if (!Template) {
	Result.suppressDiagnostics();
	// We found something weird. Complain about the first thing we found.
	NamedDecl Found = Result.begin();
	S.Diag(Found->getLocation(), diag::err_malformed_std_initializer_list);
	return nullptr;
	}

	// We found some template called std::initializer_list. Now verify that it's
	// correct.
	TemplateParameterList *Params = Template->getTemplateParameters();
	if (Params->getMinRequiredArguments() != 1 \|\|
	!isa<TemplateTypeParmDecl>(Params->getParam(0))) {
	S.Diag(Template->getLocation(), diag::err_malformed_std_initializer_list);
	return nullptr;
	}

	return Template;
	}

	QualType Sema::BuildStdInitializerList(QualType Element, SourceLocation Loc) {
	if (!StdInitializerList) {
	StdInitializerList = LookupStdInitializerList(*this, Loc);
	if (!StdInitializerList)
	return QualType();
	}

	TemplateArgumentListInfo Args(Loc, Loc);
	Args.addArgument(TemplateArgumentLoc(TemplateArgument(Element),
	Context.getTrivialTypeSourceInfo(Element,
	Loc)));
	return Context.getCanonicalType(
	CheckTemplateIdType(TemplateName(StdInitializerList), Loc, Args));
	}

	bool Sema::isInitListConstructor(const FunctionDecl *Ctor) {
	// C++ [dcl.init.list]p2:
	// A constructor is an initializer-list constructor if its first parameter
	// is of type std::initializer_list<E> or reference to possibly cv-qualified
	// std::initializer_list<E> for some type E, and either there are no other
	// parameters or else all other parameters have default arguments.
	if (Ctor->getNumParams() < 1 \|\|
	(Ctor->getNumParams() > 1 && !Ctor->getParamDecl(1)->hasDefaultArg()))
	return false;

	QualType ArgType = Ctor->getParamDecl(0)->getType();
	if (const ReferenceType *RT = ArgType->getAs<ReferenceType>())
	ArgType = RT->getPointeeType().getUnqualifiedType();

	return isStdInitializerList(ArgType, nullptr);
	}

	/// \brief Determine whether a using statement is in a context where it will be
	/// apply in all contexts.
	static bool IsUsingDirectiveInToplevelContext(DeclContext *CurContext) {
	switch (CurContext->getDeclKind()) {
	case Decl::TranslationUnit:
	return true;
	case Decl::LinkageSpec:
	return IsUsingDirectiveInToplevelContext(CurContext->getParent());
	default:
	return false;
	}
	}

	namespace {

	// Callback to only accept typo corrections that are namespaces.
	class NamespaceValidatorCCC : public CorrectionCandidateCallback {
	public:
	bool ValidateCandidate(const TypoCorrection &candidate) override {
	if (NamedDecl *ND = candidate.getCorrectionDecl())
	return isa<NamespaceDecl>(ND) \|\| isa<NamespaceAliasDecl>(ND);
	return false;
	}
	};

	}

	static bool TryNamespaceTypoCorrection(Sema &S, LookupResult &R, Scope *Sc,
	CXXScopeSpec &SS,
	SourceLocation IdentLoc,
	IdentifierInfo *Ident) {
	R.clear();
	if (TypoCorrection Corrected =
	S.CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), Sc, &SS,
	llvm::make_unique<NamespaceValidatorCCC>(),
	Sema::CTK_ErrorRecovery)) {
	if (DeclContext *DC = S.computeDeclContext(SS, false)) {
	std::string CorrectedStr(Corrected.getAsString(S.getLangOpts()));
	bool DroppedSpecifier = Corrected.WillReplaceSpecifier() &&
	Ident->getName().equals(CorrectedStr);
	S.diagnoseTypo(Corrected,
	S.PDiag(diag::err_using_directive_member_suggest)
	<< Ident << DC << DroppedSpecifier << SS.getRange(),
	S.PDiag(diag::note_namespace_defined_here));
	} else {
	S.diagnoseTypo(Corrected,
	S.PDiag(diag::err_using_directive_suggest) << Ident,
	S.PDiag(diag::note_namespace_defined_here));
	}
	R.addDecl(Corrected.getFoundDecl());
	return true;
	}
	return false;
	}

	Decl Sema::ActOnUsingDirective(Scope S,
	SourceLocation UsingLoc,
	SourceLocation NamespcLoc,
	CXXScopeSpec &SS,
	SourceLocation IdentLoc,
	IdentifierInfo *NamespcName,
	AttributeList *AttrList) {
	assert(!SS.isInvalid() && "Invalid CXXScopeSpec.");
	assert(NamespcName && "Invalid NamespcName.");
	assert(IdentLoc.isValid() && "Invalid NamespceName location.");

	// This can only happen along a recovery path.
	while (S->isTemplateParamScope())
	S = S->getParent();
	assert(S->getFlags() & Scope::DeclScope && "Invalid Scope.");

	UsingDirectiveDecl *UDir = nullptr;
	NestedNameSpecifier *Qualifier = nullptr;
	if (SS.isSet())
	Qualifier = SS.getScopeRep();

	// Lookup namespace name.
	LookupResult R(*this, NamespcName, IdentLoc, LookupNamespaceName);
	LookupParsedName(R, S, &SS);
	if (R.isAmbiguous())
	return nullptr;

	if (R.empty()) {
	R.clear();
	// Allow "using namespace std;" or "using namespace ::std;" even if
	// "std" hasn't been defined yet, for GCC compatibility.
	if ((!Qualifier \|\| Qualifier->getKind() == NestedNameSpecifier::Global) &&
	NamespcName->isStr("std")) {
	Diag(IdentLoc, diag::ext_using_undefined_std);
	R.addDecl(getOrCreateStdNamespace());
	R.resolveKind();
	}
	// Otherwise, attempt typo correction.
	else TryNamespaceTypoCorrection(*this, R, S, SS, IdentLoc, NamespcName);
	}

	if (!R.empty()) {
	NamedDecl *Named = R.getRepresentativeDecl();
	NamespaceDecl *NS = R.getAsSingle<NamespaceDecl>();
	assert(NS && "expected namespace decl");

	// The use of a nested name specifier may trigger deprecation warnings.
	DiagnoseUseOfDecl(Named, IdentLoc);

	// C++ [namespace.udir]p1:
	// A using-directive specifies that the names in the nominated
	// namespace can be used in the scope in which the
	// using-directive appears after the using-directive. During
	// unqualified name lookup (3.4.1), the names appear as if they
	// were declared in the nearest enclosing namespace which
	// contains both the using-directive and the nominated
	// namespace. [Note: in this context, "contains" means "contains
	// directly or indirectly". ]

	// Find enclosing context containing both using-directive and
	// nominated namespace.
	DeclContext *CommonAncestor = cast<DeclContext>(NS);
	while (CommonAncestor && !CommonAncestor->Encloses(CurContext))
	CommonAncestor = CommonAncestor->getParent();

	UDir = UsingDirectiveDecl::Create(Context, CurContext, UsingLoc, NamespcLoc,
	SS.getWithLocInContext(Context),
	IdentLoc, Named, CommonAncestor);

	if (IsUsingDirectiveInToplevelContext(CurContext) &&
	!SourceMgr.isInMainFile(SourceMgr.getExpansionLoc(IdentLoc))) {
	Diag(IdentLoc, diag::warn_using_directive_in_header);
	}

	PushUsingDirective(S, UDir);
	} else {
	Diag(IdentLoc, diag::err_expected_namespace_name) << SS.getRange();
	}

	if (UDir)
	ProcessDeclAttributeList(S, UDir, AttrList);

	return UDir;
	}

	void Sema::PushUsingDirective(Scope S, UsingDirectiveDecl UDir) {
	// If the scope has an associated entity and the using directive is at
	// namespace or translation unit scope, add the UsingDirectiveDecl into
	// its lookup structure so qualified name lookup can find it.
	DeclContext *Ctx = S->getEntity();
	if (Ctx && !Ctx->isFunctionOrMethod())
	Ctx->addDecl(UDir);
	else
	// Otherwise, it is at block scope. The using-directives will affect lookup
	// only to the end of the scope.
	S->PushUsingDirective(UDir);
	}


	Decl Sema::ActOnUsingDeclaration(Scope S,
	AccessSpecifier AS,
	SourceLocation UsingLoc,
	SourceLocation TypenameLoc,
	CXXScopeSpec &SS,
	UnqualifiedId &Name,
	SourceLocation EllipsisLoc,
	AttributeList *AttrList) {
	assert(S->getFlags() & Scope::DeclScope && "Invalid Scope.");

	if (SS.isEmpty()) {
	Diag(Name.getLocStart(), diag::err_using_requires_qualname);
	return nullptr;
	}

	switch (Name.getKind()) {
	case UnqualifiedId::IK_ImplicitSelfParam:
	case UnqualifiedId::IK_Identifier:
	case UnqualifiedId::IK_OperatorFunctionId:
	case UnqualifiedId::IK_LiteralOperatorId:
	case UnqualifiedId::IK_ConversionFunctionId:
	break;

	case UnqualifiedId::IK_ConstructorName:
	case UnqualifiedId::IK_ConstructorTemplateId:
	// C++11 inheriting constructors.
	Diag(Name.getLocStart(),
	getLangOpts().CPlusPlus11 ?
	diag::warn_cxx98_compat_using_decl_constructor :
	diag::err_using_decl_constructor)
	<< SS.getRange();

	if (getLangOpts().CPlusPlus11) break;

	return nullptr;

	case UnqualifiedId::IK_DestructorName:
	Diag(Name.getLocStart(), diag::err_using_decl_destructor)
	<< SS.getRange();
	return nullptr;

	case UnqualifiedId::IK_TemplateId:
	Diag(Name.getLocStart(), diag::err_using_decl_template_id)
	<< SourceRange(Name.TemplateId->LAngleLoc, Name.TemplateId->RAngleLoc);
	return nullptr;

	case UnqualifiedId::IK_DeductionGuideName:
	llvm_unreachable("cannot parse qualified deduction guide name");
	}

	DeclarationNameInfo TargetNameInfo = GetNameFromUnqualifiedId(Name);
	DeclarationName TargetName = TargetNameInfo.getName();
	if (!TargetName)
	return nullptr;

	// Warn about access declarations.
	if (UsingLoc.isInvalid()) {
	Diag(Name.getLocStart(),
	getLangOpts().CPlusPlus11 ? diag::err_access_decl
	: diag::warn_access_decl_deprecated)
	<< FixItHint::CreateInsertion(SS.getRange().getBegin(), "using ");
	}

	if (EllipsisLoc.isInvalid()) {
	if (DiagnoseUnexpandedParameterPack(SS, UPPC_UsingDeclaration) \|\|
	DiagnoseUnexpandedParameterPack(TargetNameInfo, UPPC_UsingDeclaration))
	return nullptr;
	} else {
	if (!SS.getScopeRep()->containsUnexpandedParameterPack() &&
	!TargetNameInfo.containsUnexpandedParameterPack()) {
	Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs)
	<< SourceRange(SS.getBeginLoc(), TargetNameInfo.getEndLoc());
	EllipsisLoc = SourceLocation();
	}
	}

	NamedDecl *UD =
	BuildUsingDeclaration(S, AS, UsingLoc, TypenameLoc.isValid(), TypenameLoc,
	SS, TargetNameInfo, EllipsisLoc, AttrList,
	/IsInstantiation/false);
	if (UD)
	PushOnScopeChains(UD, S, /AddToContext/ false);

	return UD;
	}

	/// \brief Determine whether a using declaration considers the given
	/// declarations as "equivalent", e.g., if they are redeclarations of
	/// the same entity or are both typedefs of the same type.
	static bool
	IsEquivalentForUsingDecl(ASTContext &Context, NamedDecl D1, NamedDecl D2) {
	if (D1->getCanonicalDecl() == D2->getCanonicalDecl())
	return true;

	if (TypedefNameDecl *TD1 = dyn_cast<TypedefNameDecl>(D1))
	if (TypedefNameDecl *TD2 = dyn_cast<TypedefNameDecl>(D2))
	return Context.hasSameType(TD1->getUnderlyingType(),
	TD2->getUnderlyingType());

	return false;
	}


	/// Determines whether to create a using shadow decl for a particular
	/// decl, given the set of decls existing prior to this using lookup.
	bool Sema::CheckUsingShadowDecl(UsingDecl Using, NamedDecl Orig,
	const LookupResult &Previous,
	UsingShadowDecl *&PrevShadow) {
	// Diagnose finding a decl which is not from a base class of the
	// current class. We do this now because there are cases where this
	// function will silently decide not to build a shadow decl, which
	// will pre-empt further diagnostics.
	//
	// We don't need to do this in C++11 because we do the check once on
	// the qualifier.
	//
	// FIXME: diagnose the following if we care enough:
	// struct A { int foo; };
	// struct B : A { using A::foo; };
	// template <class T> struct C : A {};
	// template <class T> struct D : C<T> { using B::foo; } // <---
	// This is invalid (during instantiation) in C++03 because B::foo
	// resolves to the using decl in B, which is not a base class of D<T>.
	// We can't diagnose it immediately because C<T> is an unknown
	// specialization. The UsingShadowDecl in D<T> then points directly
	// to A::foo, which will look well-formed when we instantiate.
	// The right solution is to not collapse the shadow-decl chain.
	if (!getLangOpts().CPlusPlus11 && CurContext->isRecord()) {
	DeclContext *OrigDC = Orig->getDeclContext();

	// Handle enums and anonymous structs.
	if (isa<EnumDecl>(OrigDC)) OrigDC = OrigDC->getParent();
	CXXRecordDecl *OrigRec = cast<CXXRecordDecl>(OrigDC);
	while (OrigRec->isAnonymousStructOrUnion())
	OrigRec = cast<CXXRecordDecl>(OrigRec->getDeclContext());

	if (cast<CXXRecordDecl>(CurContext)->isProvablyNotDerivedFrom(OrigRec)) {
	if (OrigDC == CurContext) {
	Diag(Using->getLocation(),
	diag::err_using_decl_nested_name_specifier_is_current_class)
	<< Using->getQualifierLoc().getSourceRange();
	Diag(Orig->getLocation(), diag::note_using_decl_target);
	Using->setInvalidDecl();
	return true;
	}

	Diag(Using->getQualifierLoc().getBeginLoc(),
	diag::err_using_decl_nested_name_specifier_is_not_base_class)
	<< Using->getQualifier()
	<< cast<CXXRecordDecl>(CurContext)
	<< Using->getQualifierLoc().getSourceRange();
	Diag(Orig->getLocation(), diag::note_using_decl_target);
	Using->setInvalidDecl();
	return true;
	}
	}

	if (Previous.empty()) return false;

	NamedDecl *Target = Orig;
	if (isa<UsingShadowDecl>(Target))
	Target = cast<UsingShadowDecl>(Target)->getTargetDecl();

	// If the target happens to be one of the previous declarations, we
	// don't have a conflict.
	//
	// FIXME: but we might be increasing its access, in which case we
	// should redeclare it.
	NamedDecl NonTag = nullptr, Tag = nullptr;
	bool FoundEquivalentDecl = false;
	for (LookupResult::iterator I = Previous.begin(), E = Previous.end();
	I != E; ++I) {
	NamedDecl D = (I)->getUnderlyingDecl();
	// We can have UsingDecls in our Previous results because we use the same
	// LookupResult for checking whether the UsingDecl itself is a valid
	// redeclaration.
	if (isa<UsingDecl>(D) \|\| isa<UsingPackDecl>(D))
	continue;

	if (IsEquivalentForUsingDecl(Context, D, Target)) {
	if (UsingShadowDecl Shadow = dyn_cast<UsingShadowDecl>(I))
	PrevShadow = Shadow;
	FoundEquivalentDecl = true;
	} else if (isEquivalentInternalLinkageDeclaration(D, Target)) {
	// We don't conflict with an existing using shadow decl of an equivalent
	// declaration, but we're not a redeclaration of it.
	FoundEquivalentDecl = true;
	}

	if (isVisible(D))
	(isa<TagDecl>(D) ? Tag : NonTag) = D;
	}

	if (FoundEquivalentDecl)
	return false;

	if (FunctionDecl *FD = Target->getAsFunction()) {
	NamedDecl *OldDecl = nullptr;
	switch (CheckOverload(nullptr, FD, Previous, OldDecl,
	/IsForUsingDecl/ true)) {
	case Ovl_Overload:
	return false;

	case Ovl_NonFunction:
	Diag(Using->getLocation(), diag::err_using_decl_conflict);
	break;

	// We found a decl with the exact signature.
	case Ovl_Match:
	// If we're in a record, we want to hide the target, so we
	// return true (without a diagnostic) to tell the caller not to
	// build a shadow decl.
	if (CurContext->isRecord())
	return true;

	// If we're not in a record, this is an error.
	Diag(Using->getLocation(), diag::err_using_decl_conflict);
	break;
	}

	Diag(Target->getLocation(), diag::note_using_decl_target);
	Diag(OldDecl->getLocation(), diag::note_using_decl_conflict);
	Using->setInvalidDecl();
	return true;
	}

	// Target is not a function.

	if (isa<TagDecl>(Target)) {
	// No conflict between a tag and a non-tag.
	if (!Tag) return false;

	Diag(Using->getLocation(), diag::err_using_decl_conflict);
	Diag(Target->getLocation(), diag::note_using_decl_target);
	Diag(Tag->getLocation(), diag::note_using_decl_conflict);
	Using->setInvalidDecl();
	return true;
	}

	// No conflict between a tag and a non-tag.
	if (!NonTag) return false;

	Diag(Using->getLocation(), diag::err_using_decl_conflict);
	Diag(Target->getLocation(), diag::note_using_decl_target);
	Diag(NonTag->getLocation(), diag::note_using_decl_conflict);
	Using->setInvalidDecl();
	return true;
	}

	/// Determine whether a direct base class is a virtual base class.
	static bool isVirtualDirectBase(CXXRecordDecl Derived, CXXRecordDecl Base) {
	if (!Derived->getNumVBases())
	return false;
	for (auto &B : Derived->bases())
	if (B.getType()->getAsCXXRecordDecl() == Base)
	return B.isVirtual();
	llvm_unreachable("not a direct base class");
	}

	/// Builds a shadow declaration corresponding to a 'using' declaration.
	UsingShadowDecl Sema::BuildUsingShadowDecl(Scope S,
	UsingDecl *UD,
	NamedDecl *Orig,
	UsingShadowDecl *PrevDecl) {
	// If we resolved to another shadow declaration, just coalesce them.
	NamedDecl *Target = Orig;
	if (isa<UsingShadowDecl>(Target)) {
	Target = cast<UsingShadowDecl>(Target)->getTargetDecl();
	assert(!isa<UsingShadowDecl>(Target) && "nested shadow declaration");
	}

	NamedDecl *NonTemplateTarget = Target;
	if (auto *TargetTD = dyn_cast<TemplateDecl>(Target))
	NonTemplateTarget = TargetTD->getTemplatedDecl();

	UsingShadowDecl *Shadow;
	if (isa<CXXConstructorDecl>(NonTemplateTarget)) {
	bool IsVirtualBase =
	isVirtualDirectBase(cast<CXXRecordDecl>(CurContext),
	UD->getQualifier()->getAsRecordDecl());
	Shadow = ConstructorUsingShadowDecl::Create(
	Context, CurContext, UD->getLocation(), UD, Orig, IsVirtualBase);
	} else {
	Shadow = UsingShadowDecl::Create(Context, CurContext, UD->getLocation(), UD,
	Target);
	}
	UD->addShadowDecl(Shadow);

	Shadow->setAccess(UD->getAccess());
	if (Orig->isInvalidDecl() \|\| UD->isInvalidDecl())
	Shadow->setInvalidDecl();

	Shadow->setPreviousDecl(PrevDecl);

	if (S)
	PushOnScopeChains(Shadow, S);
	else
	CurContext->addDecl(Shadow);


	return Shadow;
	}

	/// Hides a using shadow declaration. This is required by the current
	/// using-decl implementation when a resolvable using declaration in a
	/// class is followed by a declaration which would hide or override
	/// one or more of the using decl's targets; for example:
	///
	/// struct Base { void foo(int); };
	/// struct Derived : Base {
	/// using Base::foo;
	/// void foo(int);
	/// };
	///
	/// The governing language is C++03 [namespace.udecl]p12:
	///
	/// When a using-declaration brings names from a base class into a
	/// derived class scope, member functions in the derived class
	/// override and/or hide member functions with the same name and
	/// parameter types in a base class (rather than conflicting).
	///
	/// There are two ways to implement this:
	/// (1) optimistically create shadow decls when they're not hidden
	/// by existing declarations, or
	/// (2) don't create any shadow decls (or at least don't make them
	/// visible) until we've fully parsed/instantiated the class.
	/// The problem with (1) is that we might have to retroactively remove
	/// a shadow decl, which requires several O(n) operations because the
	/// decl structures are (very reasonably) not designed for removal.
	/// (2) avoids this but is very fiddly and phase-dependent.
	void Sema::HideUsingShadowDecl(Scope S, UsingShadowDecl Shadow) {
	if (Shadow->getDeclName().getNameKind() ==
	DeclarationName::CXXConversionFunctionName)
	cast<CXXRecordDecl>(Shadow->getDeclContext())->removeConversion(Shadow);

	// Remove it from the DeclContext...
	Shadow->getDeclContext()->removeDecl(Shadow);

	// ...and the scope, if applicable...
	if (S) {
	S->RemoveDecl(Shadow);
	IdResolver.RemoveDecl(Shadow);
	}

	// ...and the using decl.
	Shadow->getUsingDecl()->removeShadowDecl(Shadow);

	// TODO: complain somehow if Shadow was used. It shouldn't
	// be possible for this to happen, because...?
	}

	/// Find the base specifier for a base class with the given type.
	static CXXBaseSpecifier findDirectBaseWithType(CXXRecordDecl Derived,
	QualType DesiredBase,
	bool &AnyDependentBases) {
	// Check whether the named type is a direct base class.
	CanQualType CanonicalDesiredBase = DesiredBase->getCanonicalTypeUnqualified();
	for (auto &Base : Derived->bases()) {
	CanQualType BaseType = Base.getType()->getCanonicalTypeUnqualified();
	if (CanonicalDesiredBase == BaseType)
	return &Base;
	if (BaseType->isDependentType())
	AnyDependentBases = true;
	}
	return nullptr;
	}

	namespace {
	class UsingValidatorCCC : public CorrectionCandidateCallback {
	public:
	UsingValidatorCCC(bool HasTypenameKeyword, bool IsInstantiation,
	NestedNameSpecifier NNS, CXXRecordDecl RequireMemberOf)
	: HasTypenameKeyword(HasTypenameKeyword),
	IsInstantiation(IsInstantiation), OldNNS(NNS),
	RequireMemberOf(RequireMemberOf) {}

	bool ValidateCandidate(const TypoCorrection &Candidate) override {
	NamedDecl *ND = Candidate.getCorrectionDecl();

	// Keywords are not valid here.
	if (!ND \|\| isa<NamespaceDecl>(ND))
	return false;

	// Completely unqualified names are invalid for a 'using' declaration.
	if (Candidate.WillReplaceSpecifier() && !Candidate.getCorrectionSpecifier())
	return false;

	// FIXME: Don't correct to a name that CheckUsingDeclRedeclaration would
	// reject.

	if (RequireMemberOf) {
	auto *FoundRecord = dyn_cast<CXXRecordDecl>(ND);
	if (FoundRecord && FoundRecord->isInjectedClassName()) {
	// No-one ever wants a using-declaration to name an injected-class-name
	// of a base class, unless they're declaring an inheriting constructor.
	ASTContext &Ctx = ND->getASTContext();
	if (!Ctx.getLangOpts().CPlusPlus11)
	return false;
	QualType FoundType = Ctx.getRecordType(FoundRecord);

	// Check that the injected-class-name is named as a member of its own
	// type; we don't want to suggest 'using Derived::Base;', since that
	// means something else.
	NestedNameSpecifier *Specifier =
	Candidate.WillReplaceSpecifier()
	? Candidate.getCorrectionSpecifier()
	: OldNNS;
	if (!Specifier->getAsType() \|\|
	!Ctx.hasSameType(QualType(Specifier->getAsType(), 0), FoundType))
	return false;

	// Check that this inheriting constructor declaration actually names a
	// direct base class of the current class.
	bool AnyDependentBases = false;
	if (!findDirectBaseWithType(RequireMemberOf,
	Ctx.getRecordType(FoundRecord),
	AnyDependentBases) &&
	!AnyDependentBases)
	return false;
	} else {
	auto *RD = dyn_cast<CXXRecordDecl>(ND->getDeclContext());
	if (!RD \|\| RequireMemberOf->isProvablyNotDerivedFrom(RD))
	return false;

	// FIXME: Check that the base class member is accessible?
	}
	} else {
	auto *FoundRecord = dyn_cast<CXXRecordDecl>(ND);
	if (FoundRecord && FoundRecord->isInjectedClassName())
	return false;
	}

	if (isa<TypeDecl>(ND))
	return HasTypenameKeyword \|\| !IsInstantiation;

	return !HasTypenameKeyword;
	}

	private:
	bool HasTypenameKeyword;
	bool IsInstantiation;
	NestedNameSpecifier *OldNNS;
	CXXRecordDecl *RequireMemberOf;
	};
	} // end anonymous namespace

	/// Builds a using declaration.
	///
	/// \param IsInstantiation - Whether this call arises from an
	/// instantiation of an unresolved using declaration. We treat
	/// the lookup differently for these declarations.
	NamedDecl Sema::BuildUsingDeclaration(Scope S, AccessSpecifier AS,
	SourceLocation UsingLoc,
	bool HasTypenameKeyword,
	SourceLocation TypenameLoc,
	CXXScopeSpec &SS,
	DeclarationNameInfo NameInfo,
	SourceLocation EllipsisLoc,
	AttributeList *AttrList,
	bool IsInstantiation) {
	assert(!SS.isInvalid() && "Invalid CXXScopeSpec.");
	SourceLocation IdentLoc = NameInfo.getLoc();
	assert(IdentLoc.isValid() && "Invalid TargetName location.");

	// FIXME: We ignore attributes for now.

	// For an inheriting constructor declaration, the name of the using
	// declaration is the name of a constructor in this class, not in the
	// base class.
	DeclarationNameInfo UsingName = NameInfo;
	if (UsingName.getName().getNameKind() == DeclarationName::CXXConstructorName)
	if (auto *RD = dyn_cast<CXXRecordDecl>(CurContext))
	UsingName.setName(Context.DeclarationNames.getCXXConstructorName(
	Context.getCanonicalType(Context.getRecordType(RD))));

	// Do the redeclaration lookup in the current scope.
	LookupResult Previous(*this, UsingName, LookupUsingDeclName,
	ForRedeclaration);
	Previous.setHideTags(false);
	if (S) {
	LookupName(Previous, S);

	// It is really dumb that we have to do this.
	LookupResult::Filter F = Previous.makeFilter();
	while (F.hasNext()) {
	NamedDecl *D = F.next();
	if (!isDeclInScope(D, CurContext, S))
	F.erase();
	// If we found a local extern declaration that's not ordinarily visible,
	// and this declaration is being added to a non-block scope, ignore it.
	// We're only checking for scope conflicts here, not also for violations
	// of the linkage rules.
	else if (!CurContext->isFunctionOrMethod() && D->isLocalExternDecl() &&
	!(D->getIdentifierNamespace() & Decl::IDNS_Ordinary))
	F.erase();
	}
	F.done();
	} else {
	assert(IsInstantiation && "no scope in non-instantiation");
	if (CurContext->isRecord())
	LookupQualifiedName(Previous, CurContext);
	else {
	// No redeclaration check is needed here; in non-member contexts we
	// diagnosed all possible conflicts with other using-declarations when
	// building the template:
	//
	// For a dependent non-type using declaration, the only valid case is
	// if we instantiate to a single enumerator. We check for conflicts
	// between shadow declarations we introduce, and we check in the template
	// definition for conflicts between a non-type using declaration and any
	// other declaration, which together covers all cases.
	//
	// A dependent typename using declaration will never successfully
	// instantiate, since it will always name a class member, so we reject
	// that in the template definition.
	}
	}

	// Check for invalid redeclarations.
	if (CheckUsingDeclRedeclaration(UsingLoc, HasTypenameKeyword,
	SS, IdentLoc, Previous))
	return nullptr;

	// Check for bad qualifiers.
	if (CheckUsingDeclQualifier(UsingLoc, HasTypenameKeyword, SS, NameInfo,
	IdentLoc))
	return nullptr;

	DeclContext *LookupContext = computeDeclContext(SS);
	NamedDecl *D;
	NestedNameSpecifierLoc QualifierLoc = SS.getWithLocInContext(Context);
	if (!LookupContext \|\| EllipsisLoc.isValid()) {
	if (HasTypenameKeyword) {
	// FIXME: not all declaration name kinds are legal here
	D = UnresolvedUsingTypenameDecl::Create(Context, CurContext,
	UsingLoc, TypenameLoc,
	QualifierLoc,
	IdentLoc, NameInfo.getName(),
	EllipsisLoc);
	} else {
	D = UnresolvedUsingValueDecl::Create(Context, CurContext, UsingLoc,
	QualifierLoc, NameInfo, EllipsisLoc);
	}
	D->setAccess(AS);
	CurContext->addDecl(D);
	return D;
	}

	auto Build = [&](bool Invalid) {
	UsingDecl *UD =
	UsingDecl::Create(Context, CurContext, UsingLoc, QualifierLoc,
	UsingName, HasTypenameKeyword);
	UD->setAccess(AS);
	CurContext->addDecl(UD);
	UD->setInvalidDecl(Invalid);
	return UD;
	};
	auto BuildInvalid = [&]{ return Build(true); };
	auto BuildValid = [&]{ return Build(false); };

	if (RequireCompleteDeclContext(SS, LookupContext))
	return BuildInvalid();

	// Look up the target name.
	LookupResult R(*this, NameInfo, LookupOrdinaryName);

	// Unlike most lookups, we don't always want to hide tag
	// declarations: tag names are visible through the using declaration
	// even if hidden by ordinary names, except in a dependent context
	// where it's important for the sanity of two-phase lookup.
	if (!IsInstantiation)
	R.setHideTags(false);

	// For the purposes of this lookup, we have a base object type
	// equal to that of the current context.
	if (CurContext->isRecord()) {
	R.setBaseObjectType(
	Context.getTypeDeclType(cast<CXXRecordDecl>(CurContext)));
	}

	LookupQualifiedName(R, LookupContext);

	// Try to correct typos if possible. If constructor name lookup finds no
	// results, that means the named class has no explicit constructors, and we
	// suppressed declaring implicit ones (probably because it's dependent or
	// invalid).
	if (R.empty() &&
	NameInfo.getName().getNameKind() != DeclarationName::CXXConstructorName) {
	// HACK: Work around a bug in libstdc++'s detection of ::gets. Sometimes
	// it will believe that glibc provides a ::gets in cases where it does not,
	// and will try to pull it into namespace std with a using-declaration.
	// Just ignore the using-declaration in that case.
	auto *II = NameInfo.getName().getAsIdentifierInfo();
	if (getLangOpts().CPlusPlus14 && II && II->isStr("gets") &&
	CurContext->isStdNamespace() &&
	isa<TranslationUnitDecl>(LookupContext) &&
	getSourceManager().isInSystemHeader(UsingLoc))
	return nullptr;
	if (TypoCorrection Corrected = CorrectTypo(
	R.getLookupNameInfo(), R.getLookupKind(), S, &SS,
	llvm::make_unique<UsingValidatorCCC>(
	HasTypenameKeyword, IsInstantiation, SS.getScopeRep(),
	dyn_cast<CXXRecordDecl>(CurContext)),
	CTK_ErrorRecovery)) {
	// We reject candidates where DroppedSpecifier == true, hence the
	// literal '0' below.
	diagnoseTypo(Corrected, PDiag(diag::err_no_member_suggest)
	<< NameInfo.getName() << LookupContext << 0
	<< SS.getRange());

	// If we picked a correction with no attached Decl we can't do anything
	// useful with it, bail out.
	NamedDecl *ND = Corrected.getCorrectionDecl();
	if (!ND)
	return BuildInvalid();

	// If we corrected to an inheriting constructor, handle it as one.
	auto *RD = dyn_cast<CXXRecordDecl>(ND);
	if (RD && RD->isInjectedClassName()) {
	// The parent of the injected class name is the class itself.
	RD = cast<CXXRecordDecl>(RD->getParent());

	// Fix up the information we'll use to build the using declaration.
	if (Corrected.WillReplaceSpecifier()) {
	NestedNameSpecifierLocBuilder Builder;
	Builder.MakeTrivial(Context, Corrected.getCorrectionSpecifier(),
	QualifierLoc.getSourceRange());
	QualifierLoc = Builder.getWithLocInContext(Context);
	}

	// In this case, the name we introduce is the name of a derived class
	// constructor.
	auto *CurClass = cast<CXXRecordDecl>(CurContext);
	UsingName.setName(Context.DeclarationNames.getCXXConstructorName(
	Context.getCanonicalType(Context.getRecordType(CurClass))));
	UsingName.setNamedTypeInfo(nullptr);
	for (auto *Ctor : LookupConstructors(RD))
	R.addDecl(Ctor);
	R.resolveKind();
	} else {
	// FIXME: Pick up all the declarations if we found an overloaded
	// function.
	UsingName.setName(ND->getDeclName());
	R.addDecl(ND);
	}
	} else {
	Diag(IdentLoc, diag::err_no_member)
	<< NameInfo.getName() << LookupContext << SS.getRange();
	return BuildInvalid();
	}
	}

	if (R.isAmbiguous())
	return BuildInvalid();

	if (HasTypenameKeyword) {
	// If we asked for a typename and got a non-type decl, error out.
	if (!R.getAsSingle<TypeDecl>()) {
	Diag(IdentLoc, diag::err_using_typename_non_type);
	for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I)
	Diag((*I)->getUnderlyingDecl()->getLocation(),
	diag::note_using_decl_target);
	return BuildInvalid();
	}
	} else {
	// If we asked for a non-typename and we got a type, error out,
	// but only if this is an instantiation of an unresolved using
	// decl. Otherwise just silently find the type name.
	if (IsInstantiation && R.getAsSingle<TypeDecl>()) {
	Diag(IdentLoc, diag::err_using_dependent_value_is_type);
	Diag(R.getFoundDecl()->getLocation(), diag::note_using_decl_target);
	return BuildInvalid();
	}
	}

	// C++14 [namespace.udecl]p6:
	// A using-declaration shall not name a namespace.
	if (R.getAsSingle<NamespaceDecl>()) {
	Diag(IdentLoc, diag::err_using_decl_can_not_refer_to_namespace)
	<< SS.getRange();
	return BuildInvalid();
	}

	// C++14 [namespace.udecl]p7:
	// A using-declaration shall not name a scoped enumerator.
	if (auto *ED = R.getAsSingle<EnumConstantDecl>()) {
	if (cast<EnumDecl>(ED->getDeclContext())->isScoped()) {
	Diag(IdentLoc, diag::err_using_decl_can_not_refer_to_scoped_enum)
	<< SS.getRange();
	return BuildInvalid();
	}
	}

	UsingDecl *UD = BuildValid();

	// Some additional rules apply to inheriting constructors.
	if (UsingName.getName().getNameKind() ==
	DeclarationName::CXXConstructorName) {
	// Suppress access diagnostics; the access check is instead performed at the
	// point of use for an inheriting constructor.
	R.suppressDiagnostics();
	if (CheckInheritingConstructorUsingDecl(UD))
	return UD;
	}

	for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I) {
	UsingShadowDecl *PrevDecl = nullptr;
	if (!CheckUsingShadowDecl(UD, *I, Previous, PrevDecl))
	BuildUsingShadowDecl(S, UD, *I, PrevDecl);
	}

	return UD;
	}

	NamedDecl Sema::BuildUsingPackDecl(NamedDecl InstantiatedFrom,
	ArrayRef<NamedDecl *> Expansions) {
	assert(isa<UnresolvedUsingValueDecl>(InstantiatedFrom) \|\|
	isa<UnresolvedUsingTypenameDecl>(InstantiatedFrom) \|\|
	isa<UsingPackDecl>(InstantiatedFrom));

	auto *UPD =
	UsingPackDecl::Create(Context, CurContext, InstantiatedFrom, Expansions);
	UPD->setAccess(InstantiatedFrom->getAccess());
	CurContext->addDecl(UPD);
	return UPD;
	}

	/// Additional checks for a using declaration referring to a constructor name.
	bool Sema::CheckInheritingConstructorUsingDecl(UsingDecl *UD) {
	assert(!UD->hasTypename() && "expecting a constructor name");

	const Type *SourceType = UD->getQualifier()->getAsType();
	assert(SourceType &&
	"Using decl naming constructor doesn't have type in scope spec.");
	CXXRecordDecl *TargetClass = cast<CXXRecordDecl>(CurContext);

	// Check whether the named type is a direct base class.
	bool AnyDependentBases = false;
	auto *Base = findDirectBaseWithType(TargetClass, QualType(SourceType, 0),
	AnyDependentBases);
	if (!Base && !AnyDependentBases) {
	Diag(UD->getUsingLoc(),
	diag::err_using_decl_constructor_not_in_direct_base)
	<< UD->getNameInfo().getSourceRange()
	<< QualType(SourceType, 0) << TargetClass;
	UD->setInvalidDecl();
	return true;
	}

	if (Base)
	Base->setInheritConstructors();

	return false;
	}

	/// Checks that the given using declaration is not an invalid
	/// redeclaration. Note that this is checking only for the using decl
	/// itself, not for any ill-formedness among the UsingShadowDecls.
	bool Sema::CheckUsingDeclRedeclaration(SourceLocation UsingLoc,
	bool HasTypenameKeyword,
	const CXXScopeSpec &SS,
	SourceLocation NameLoc,
	const LookupResult &Prev) {
	NestedNameSpecifier *Qual = SS.getScopeRep();

	// C++03 [namespace.udecl]p8:
	// C++0x [namespace.udecl]p10:
	// A using-declaration is a declaration and can therefore be used
	// repeatedly where (and only where) multiple declarations are
	// allowed.
	//
	// That's in non-member contexts.
	if (!CurContext->getRedeclContext()->isRecord()) {
	// A dependent qualifier outside a class can only ever resolve to an
	// enumeration type. Therefore it conflicts with any other non-type
	// declaration in the same scope.
	// FIXME: How should we check for dependent type-type conflicts at block
	// scope?
	if (Qual->isDependent() && !HasTypenameKeyword) {
	for (auto *D : Prev) {
	if (!isa<TypeDecl>(D) && !isa<UsingDecl>(D) && !isa<UsingPackDecl>(D)) {
	bool OldCouldBeEnumerator =
	isa<UnresolvedUsingValueDecl>(D) \|\| isa<EnumConstantDecl>(D);
	Diag(NameLoc,
	OldCouldBeEnumerator ? diag::err_redefinition
	: diag::err_redefinition_different_kind)
	<< Prev.getLookupName();
	Diag(D->getLocation(), diag::note_previous_definition);
	return true;
	}
	}
	}
	return false;
	}

	for (LookupResult::iterator I = Prev.begin(), E = Prev.end(); I != E; ++I) {
	NamedDecl D = I;

	bool DTypename;
	NestedNameSpecifier *DQual;
	if (UsingDecl *UD = dyn_cast<UsingDecl>(D)) {
	DTypename = UD->hasTypename();
	DQual = UD->getQualifier();
	} else if (UnresolvedUsingValueDecl *UD
	= dyn_cast<UnresolvedUsingValueDecl>(D)) {
	DTypename = false;
	DQual = UD->getQualifier();
	} else if (UnresolvedUsingTypenameDecl *UD
	= dyn_cast<UnresolvedUsingTypenameDecl>(D)) {
	DTypename = true;
	DQual = UD->getQualifier();
	} else continue;

	// using decls differ if one says 'typename' and the other doesn't.
	// FIXME: non-dependent using decls?
	if (HasTypenameKeyword != DTypename) continue;

	// using decls differ if they name different scopes (but note that
	// template instantiation can cause this check to trigger when it
	// didn't before instantiation).
	if (Context.getCanonicalNestedNameSpecifier(Qual) !=
	Context.getCanonicalNestedNameSpecifier(DQual))
	continue;

	Diag(NameLoc, diag::err_using_decl_redeclaration) << SS.getRange();
	Diag(D->getLocation(), diag::note_using_decl) << 1;
	return true;
	}

	return false;
	}


	/// Checks that the given nested-name qualifier used in a using decl
	/// in the current context is appropriately related to the current
	/// scope. If an error is found, diagnoses it and returns true.
	bool Sema::CheckUsingDeclQualifier(SourceLocation UsingLoc,
	bool HasTypename,
	const CXXScopeSpec &SS,
	const DeclarationNameInfo &NameInfo,
	SourceLocation NameLoc) {
	DeclContext *NamedContext = computeDeclContext(SS);

	if (!CurContext->isRecord()) {
	// C++03 [namespace.udecl]p3:
	// C++0x [namespace.udecl]p8:
	// A using-declaration for a class member shall be a member-declaration.

	// If we weren't able to compute a valid scope, it might validly be a
	// dependent class scope or a dependent enumeration unscoped scope. If
	// we have a 'typename' keyword, the scope must resolve to a class type.
	if ((HasTypename && !NamedContext) \|\|
	(NamedContext && NamedContext->getRedeclContext()->isRecord())) {
	auto *RD = NamedContext
	? cast<CXXRecordDecl>(NamedContext->getRedeclContext())
	: nullptr;
	if (RD && RequireCompleteDeclContext(const_cast<CXXScopeSpec&>(SS), RD))
	RD = nullptr;

	Diag(NameLoc, diag::err_using_decl_can_not_refer_to_class_member)
	<< SS.getRange();

	// If we have a complete, non-dependent source type, try to suggest a
	// way to get the same effect.
	if (!RD)
	return true;

	// Find what this using-declaration was referring to.
	LookupResult R(*this, NameInfo, LookupOrdinaryName);
	R.setHideTags(false);
	R.suppressDiagnostics();
	LookupQualifiedName(R, RD);

	if (R.getAsSingle<TypeDecl>()) {
	if (getLangOpts().CPlusPlus11) {
	// Convert 'using X::Y;' to 'using Y = X::Y;'.
	Diag(SS.getBeginLoc(), diag::note_using_decl_class_member_workaround)
	<< 0 // alias declaration
	<< FixItHint::CreateInsertion(SS.getBeginLoc(),
	NameInfo.getName().getAsString() +
	" = ");
	} else {
	// Convert 'using X::Y;' to 'typedef X::Y Y;'.
	SourceLocation InsertLoc =
	getLocForEndOfToken(NameInfo.getLocEnd());
	Diag(InsertLoc, diag::note_using_decl_class_member_workaround)
	<< 1 // typedef declaration
	<< FixItHint::CreateReplacement(UsingLoc, "typedef")
	<< FixItHint::CreateInsertion(
	InsertLoc, " " + NameInfo.getName().getAsString());
	}
	} else if (R.getAsSingle<VarDecl>()) {
	// Don't provide a fixit outside C++11 mode; we don't want to suggest
	// repeating the type of the static data member here.
	FixItHint FixIt;
	if (getLangOpts().CPlusPlus11) {
	// Convert 'using X::Y;' to 'auto &Y = X::Y;'.
	FixIt = FixItHint::CreateReplacement(
	UsingLoc, "auto &" + NameInfo.getName().getAsString() + " = ");
	}

	Diag(UsingLoc, diag::note_using_decl_class_member_workaround)
	<< 2 // reference declaration
	<< FixIt;
	} else if (R.getAsSingle<EnumConstantDecl>()) {
	// Don't provide a fixit outside C++11 mode; we don't want to suggest
	// repeating the type of the enumeration here, and we can't do so if
	// the type is anonymous.
	FixItHint FixIt;
	if (getLangOpts().CPlusPlus11) {
	// Convert 'using X::Y;' to 'auto &Y = X::Y;'.
	FixIt = FixItHint::CreateReplacement(
	UsingLoc,
	"constexpr auto " + NameInfo.getName().getAsString() + " = ");
	}

	Diag(UsingLoc, diag::note_using_decl_class_member_workaround)
	<< (getLangOpts().CPlusPlus11 ? 4 : 3) // const[expr] variable
	<< FixIt;
	}
	return true;
	}

	// Otherwise, this might be valid.
	return false;
	}

	// The current scope is a record.

	// If the named context is dependent, we can't decide much.
	if (!NamedContext) {
	// FIXME: in C++0x, we can diagnose if we can prove that the
	// nested-name-specifier does not refer to a base class, which is
	// still possible in some cases.

	// Otherwise we have to conservatively report that things might be
	// okay.
	return false;
	}

	if (!NamedContext->isRecord()) {
	// Ideally this would point at the last name in the specifier,
	// but we don't have that level of source info.
	Diag(SS.getRange().getBegin(),
	diag::err_using_decl_nested_name_specifier_is_not_class)
	<< SS.getScopeRep() << SS.getRange();
	return true;
	}

	if (!NamedContext->isDependentContext() &&
	RequireCompleteDeclContext(const_cast<CXXScopeSpec&>(SS), NamedContext))
	return true;

	if (getLangOpts().CPlusPlus11) {
	// C++11 [namespace.udecl]p3:
	// In a using-declaration used as a member-declaration, the
	// nested-name-specifier shall name a base class of the class
	// being defined.

	if (cast<CXXRecordDecl>(CurContext)->isProvablyNotDerivedFrom(
	cast<CXXRecordDecl>(NamedContext))) {
	if (CurContext == NamedContext) {
	Diag(NameLoc,
	diag::err_using_decl_nested_name_specifier_is_current_class)
	<< SS.getRange();
	return true;
	}

	if (!cast<CXXRecordDecl>(NamedContext)->isInvalidDecl()) {
	Diag(SS.getRange().getBegin(),
	diag::err_using_decl_nested_name_specifier_is_not_base_class)
	<< SS.getScopeRep()
	<< cast<CXXRecordDecl>(CurContext)
	<< SS.getRange();
	}
	return true;
	}

	return false;
	}

	// C++03 [namespace.udecl]p4:
	// A using-declaration used as a member-declaration shall refer
	// to a member of a base class of the class being defined [etc.].

	// Salient point: SS doesn't have to name a base class as long as
	// lookup only finds members from base classes. Therefore we can
	// diagnose here only if we can prove that that can't happen,
	// i.e. if the class hierarchies provably don't intersect.

	// TODO: it would be nice if "definitely valid" results were cached
	// in the UsingDecl and UsingShadowDecl so that these checks didn't
	// need to be repeated.

	llvm::SmallPtrSet<const CXXRecordDecl *, 4> Bases;
	auto Collect = [&Bases](const CXXRecordDecl *Base) {
	Bases.insert(Base);
	return true;
	};

	// Collect all bases. Return false if we find a dependent base.
	if (!cast<CXXRecordDecl>(CurContext)->forallBases(Collect))
	return false;

	// Returns true if the base is dependent or is one of the accumulated base
	// classes.
	auto IsNotBase = [&Bases](const CXXRecordDecl *Base) {
	return !Bases.count(Base);
	};

	// Return false if the class has a dependent base or if it or one
	// of its bases is present in the base set of the current context.
	if (Bases.count(cast<CXXRecordDecl>(NamedContext)) \|\|
	!cast<CXXRecordDecl>(NamedContext)->forallBases(IsNotBase))
	return false;

	Diag(SS.getRange().getBegin(),
	diag::err_using_decl_nested_name_specifier_is_not_base_class)
	<< SS.getScopeRep()
	<< cast<CXXRecordDecl>(CurContext)
	<< SS.getRange();

	return true;
	}

	Decl Sema::ActOnAliasDeclaration(Scope S,
	AccessSpecifier AS,
	MultiTemplateParamsArg TemplateParamLists,
	SourceLocation UsingLoc,
	UnqualifiedId &Name,
	AttributeList *AttrList,
	TypeResult Type,
	Decl *DeclFromDeclSpec) {
	// Skip up to the relevant declaration scope.
	while (S->isTemplateParamScope())
	S = S->getParent();
	assert((S->getFlags() & Scope::DeclScope) &&
	"got alias-declaration outside of declaration scope");

	if (Type.isInvalid())
	return nullptr;

	bool Invalid = false;
	DeclarationNameInfo NameInfo = GetNameFromUnqualifiedId(Name);
	TypeSourceInfo *TInfo = nullptr;
	GetTypeFromParser(Type.get(), &TInfo);

	if (DiagnoseClassNameShadow(CurContext, NameInfo))
	return nullptr;

	if (DiagnoseUnexpandedParameterPack(Name.StartLocation, TInfo,
	UPPC_DeclarationType)) {
	Invalid = true;
	TInfo = Context.getTrivialTypeSourceInfo(Context.IntTy,
	TInfo->getTypeLoc().getBeginLoc());
	}

	LookupResult Previous(*this, NameInfo, LookupOrdinaryName, ForRedeclaration);
	LookupName(Previous, S);

	// Warn about shadowing the name of a template parameter.
	if (Previous.isSingleResult() &&
	Previous.getFoundDecl()->isTemplateParameter()) {
	DiagnoseTemplateParameterShadow(Name.StartLocation,Previous.getFoundDecl());
	Previous.clear();
	}

	assert(Name.Kind == UnqualifiedId::IK_Identifier &&
	"name in alias declaration must be an identifier");
	TypeAliasDecl *NewTD = TypeAliasDecl::Create(Context, CurContext, UsingLoc,
	Name.StartLocation,
	Name.Identifier, TInfo);

	NewTD->setAccess(AS);

	if (Invalid)
	NewTD->setInvalidDecl();

	ProcessDeclAttributeList(S, NewTD, AttrList);
	AddPragmaAttributes(S, NewTD);

	CheckTypedefForVariablyModifiedType(S, NewTD);
	Invalid \|= NewTD->isInvalidDecl();

	bool Redeclaration = false;

	NamedDecl *NewND;
	if (TemplateParamLists.size()) {
	TypeAliasTemplateDecl *OldDecl = nullptr;
	TemplateParameterList *OldTemplateParams = nullptr;

	if (TemplateParamLists.size() != 1) {
	Diag(UsingLoc, diag::err_alias_template_extra_headers)
	<< SourceRange(TemplateParamLists[1]->getTemplateLoc(),
	TemplateParamLists[TemplateParamLists.size()-1]->getRAngleLoc());
	}
	TemplateParameterList *TemplateParams = TemplateParamLists[0];

	// Check that we can declare a template here.
	if (CheckTemplateDeclScope(S, TemplateParams))
	return nullptr;

	// Only consider previous declarations in the same scope.
	FilterLookupForScope(Previous, CurContext, S, /ConsiderLinkage/false,
	/ExplicitInstantiationOrSpecialization/false);
	if (!Previous.empty()) {
	Redeclaration = true;

	OldDecl = Previous.getAsSingle<TypeAliasTemplateDecl>();
	if (!OldDecl && !Invalid) {
	Diag(UsingLoc, diag::err_redefinition_different_kind)
	<< Name.Identifier;

	NamedDecl *OldD = Previous.getRepresentativeDecl();
	if (OldD->getLocation().isValid())
	Diag(OldD->getLocation(), diag::note_previous_definition);

	Invalid = true;
	}

	if (!Invalid && OldDecl && !OldDecl->isInvalidDecl()) {
	if (TemplateParameterListsAreEqual(TemplateParams,
	OldDecl->getTemplateParameters(),
	/Complain=/true,
	TPL_TemplateMatch))
	OldTemplateParams = OldDecl->getTemplateParameters();
	else
	Invalid = true;

	TypeAliasDecl *OldTD = OldDecl->getTemplatedDecl();
	if (!Invalid &&
	!Context.hasSameType(OldTD->getUnderlyingType(),
	NewTD->getUnderlyingType())) {
	// FIXME: The C++0x standard does not clearly say this is ill-formed,
	// but we can't reasonably accept it.
	Diag(NewTD->getLocation(), diag::err_redefinition_different_typedef)
	<< 2 << NewTD->getUnderlyingType() << OldTD->getUnderlyingType();
	if (OldTD->getLocation().isValid())
	Diag(OldTD->getLocation(), diag::note_previous_definition);
	Invalid = true;
	}
	}
	}

	// Merge any previous default template arguments into our parameters,
	// and check the parameter list.
	if (CheckTemplateParameterList(TemplateParams, OldTemplateParams,
	TPC_TypeAliasTemplate))
	return nullptr;

	TypeAliasTemplateDecl *NewDecl =
	TypeAliasTemplateDecl::Create(Context, CurContext, UsingLoc,
	Name.Identifier, TemplateParams,
	NewTD);
	NewTD->setDescribedAliasTemplate(NewDecl);

	NewDecl->setAccess(AS);

	if (Invalid)
	NewDecl->setInvalidDecl();
	else if (OldDecl)
	NewDecl->setPreviousDecl(OldDecl);

	NewND = NewDecl;
	} else {
	if (auto *TD = dyn_cast_or_null<TagDecl>(DeclFromDeclSpec)) {
	setTagNameForLinkagePurposes(TD, NewTD);
	handleTagNumbering(TD, S);
	}
	ActOnTypedefNameDecl(S, CurContext, NewTD, Previous, Redeclaration);
	NewND = NewTD;
	}

	PushOnScopeChains(NewND, S);
	ActOnDocumentableDecl(NewND);
	return NewND;
	}

	Decl Sema::ActOnNamespaceAliasDef(Scope S, SourceLocation NamespaceLoc,
	SourceLocation AliasLoc,
	IdentifierInfo *Alias, CXXScopeSpec &SS,
	SourceLocation IdentLoc,
	IdentifierInfo *Ident) {

	// Lookup the namespace name.
	LookupResult R(*this, Ident, IdentLoc, LookupNamespaceName);
	LookupParsedName(R, S, &SS);

	if (R.isAmbiguous())
	return nullptr;

	if (R.empty()) {
	if (!TryNamespaceTypoCorrection(*this, R, S, SS, IdentLoc, Ident)) {
	Diag(IdentLoc, diag::err_expected_namespace_name) << SS.getRange();
	return nullptr;
	}
	}
	assert(!R.isAmbiguous() && !R.empty());
	NamedDecl *ND = R.getRepresentativeDecl();

	// Check if we have a previous declaration with the same name.
	LookupResult PrevR(*this, Alias, AliasLoc, LookupOrdinaryName,
	ForRedeclaration);
	LookupName(PrevR, S);

	// Check we're not shadowing a template parameter.
	if (PrevR.isSingleResult() && PrevR.getFoundDecl()->isTemplateParameter()) {
	DiagnoseTemplateParameterShadow(AliasLoc, PrevR.getFoundDecl());
	PrevR.clear();
	}

	// Filter out any other lookup result from an enclosing scope.
	FilterLookupForScope(PrevR, CurContext, S, /ConsiderLinkage/false,
	/AllowInlineNamespace/false);

	// Find the previous declaration and check that we can redeclare it.
	NamespaceAliasDecl *Prev = nullptr;
	if (PrevR.isSingleResult()) {
	NamedDecl *PrevDecl = PrevR.getRepresentativeDecl();
	if (NamespaceAliasDecl *AD = dyn_cast<NamespaceAliasDecl>(PrevDecl)) {
	// We already have an alias with the same name that points to the same
	// namespace; check that it matches.
	if (AD->getNamespace()->Equals(getNamespaceDecl(ND))) {
	Prev = AD;
	} else if (isVisible(PrevDecl)) {
	Diag(AliasLoc, diag::err_redefinition_different_namespace_alias)
	<< Alias;
	Diag(AD->getLocation(), diag::note_previous_namespace_alias)
	<< AD->getNamespace();
	return nullptr;
	}
	} else if (isVisible(PrevDecl)) {
	unsigned DiagID = isa<NamespaceDecl>(PrevDecl->getUnderlyingDecl())
	? diag::err_redefinition
	: diag::err_redefinition_different_kind;
	Diag(AliasLoc, DiagID) << Alias;
	Diag(PrevDecl->getLocation(), diag::note_previous_definition);
	return nullptr;
	}
	}

	// The use of a nested name specifier may trigger deprecation warnings.
	DiagnoseUseOfDecl(ND, IdentLoc);

	NamespaceAliasDecl *AliasDecl =
	NamespaceAliasDecl::Create(Context, CurContext, NamespaceLoc, AliasLoc,
	Alias, SS.getWithLocInContext(Context),
	IdentLoc, ND);
	if (Prev)
	AliasDecl->setPreviousDecl(Prev);

	PushOnScopeChains(AliasDecl, S);
	return AliasDecl;
	}

	namespace {
	struct SpecialMemberExceptionSpecInfo
	: SpecialMemberVisitor<SpecialMemberExceptionSpecInfo> {
	SourceLocation Loc;
	Sema::ImplicitExceptionSpecification ExceptSpec;

	SpecialMemberExceptionSpecInfo(Sema &S, CXXMethodDecl *MD,
	Sema::CXXSpecialMember CSM,
	Sema::InheritedConstructorInfo *ICI,
	SourceLocation Loc)
	: SpecialMemberVisitor(S, MD, CSM, ICI), Loc(Loc), ExceptSpec(S) {}

	bool visitBase(CXXBaseSpecifier *Base);
	bool visitField(FieldDecl *FD);

	void visitClassSubobject(CXXRecordDecl *Class, Subobject Subobj,
	unsigned Quals);

	void visitSubobjectCall(Subobject Subobj,
	Sema::SpecialMemberOverloadResult SMOR);
	};
	}

	bool SpecialMemberExceptionSpecInfo::visitBase(CXXBaseSpecifier *Base) {
	auto *RT = Base->getType()->getAs<RecordType>();
	if (!RT)
	return false;

	auto *BaseClass = cast<CXXRecordDecl>(RT->getDecl());
	Sema::SpecialMemberOverloadResult SMOR = lookupInheritedCtor(BaseClass);
	if (auto *BaseCtor = SMOR.getMethod()) {
	visitSubobjectCall(Base, BaseCtor);
	return false;
	}

	visitClassSubobject(BaseClass, Base, 0);
	return false;
	}

	bool SpecialMemberExceptionSpecInfo::visitField(FieldDecl *FD) {
	if (CSM == Sema::CXXDefaultConstructor && FD->hasInClassInitializer()) {
	Expr *E = FD->getInClassInitializer();
	if (!E)
	// FIXME: It's a little wasteful to build and throw away a
	// CXXDefaultInitExpr here.
	// FIXME: We should have a single context note pointing at Loc, and
	// this location should be MD->getLocation() instead, since that's
	// the location where we actually use the default init expression.
	E = S.BuildCXXDefaultInitExpr(Loc, FD).get();
	if (E)
	ExceptSpec.CalledExpr(E);
	} else if (auto *RT = S.Context.getBaseElementType(FD->getType())
	->getAs<RecordType>()) {
	visitClassSubobject(cast<CXXRecordDecl>(RT->getDecl()), FD,
	FD->getType().getCVRQualifiers());
	}
	return false;
	}

	void SpecialMemberExceptionSpecInfo::visitClassSubobject(CXXRecordDecl *Class,
	Subobject Subobj,
	unsigned Quals) {
	FieldDecl Field = Subobj.dyn_cast<FieldDecl>();
	bool IsMutable = Field && Field->isMutable();
	visitSubobjectCall(Subobj, lookupIn(Class, Quals, IsMutable));
	}

	void SpecialMemberExceptionSpecInfo::visitSubobjectCall(
	Subobject Subobj, Sema::SpecialMemberOverloadResult SMOR) {
	// Note, if lookup fails, it doesn't matter what exception specification we
	// choose because the special member will be deleted.
	if (CXXMethodDecl *MD = SMOR.getMethod())
	ExceptSpec.CalledDecl(getSubobjectLoc(Subobj), MD);
	}

	static Sema::ImplicitExceptionSpecification
	ComputeDefaultedSpecialMemberExceptionSpec(
	Sema &S, SourceLocation Loc, CXXMethodDecl *MD, Sema::CXXSpecialMember CSM,
	Sema::InheritedConstructorInfo *ICI) {
	CXXRecordDecl *ClassDecl = MD->getParent();

	// C++ [except.spec]p14:
	// An implicitly declared special member function (Clause 12) shall have an
	// exception-specification. [...]
	SpecialMemberExceptionSpecInfo Info(S, MD, CSM, ICI, Loc);
	if (ClassDecl->isInvalidDecl())
	return Info.ExceptSpec;

	// C++1z [except.spec]p7:
	// [Look for exceptions thrown by] a constructor selected [...] to
	// initialize a potentially constructed subobject,
	// C++1z [except.spec]p8:
	// The exception specification for an implicitly-declared destructor, or a
	// destructor without a noexcept-specifier, is potentially-throwing if and
	// only if any of the destructors for any of its potentially constructed
	// subojects is potentially throwing.
	// FIXME: We respect the first rule but ignore the "potentially constructed"
	// in the second rule to resolve a core issue (no number yet) that would have
	// us reject:
	// struct A { virtual void f() = 0; virtual ~A() noexcept(false) = 0; };
	// struct B : A {};
	// struct C : B { void f(); };
	// ... due to giving B::~B() a non-throwing exception specification.
	Info.visit(Info.IsConstructor ? Info.VisitPotentiallyConstructedBases
	: Info.VisitAllBases);

	return Info.ExceptSpec;
	}

	namespace {
	/// RAII object to register a special member as being currently declared.
	struct DeclaringSpecialMember {
	Sema &S;
	Sema::SpecialMemberDecl D;
	Sema::ContextRAII SavedContext;
	bool WasAlreadyBeingDeclared;

	DeclaringSpecialMember(Sema &S, CXXRecordDecl *RD, Sema::CXXSpecialMember CSM)
	: S(S), D(RD, CSM), SavedContext(S, RD) {
	WasAlreadyBeingDeclared = !S.SpecialMembersBeingDeclared.insert(D).second;
	if (WasAlreadyBeingDeclared)
	// This almost never happens, but if it does, ensure that our cache
	// doesn't contain a stale result.
	S.SpecialMemberCache.clear();
	else {
	// Register a note to be produced if we encounter an error while
	// declaring the special member.
	Sema::CodeSynthesisContext Ctx;
	Ctx.Kind = Sema::CodeSynthesisContext::DeclaringSpecialMember;
	// FIXME: We don't have a location to use here. Using the class's
	// location maintains the fiction that we declare all special members
	// with the class, but (1) it's not clear that lying about that helps our
	// users understand what's going on, and (2) there may be outer contexts
	// on the stack (some of which are relevant) and printing them exposes
	// our lies.
	Ctx.PointOfInstantiation = RD->getLocation();
	Ctx.Entity = RD;
	Ctx.SpecialMember = CSM;
	S.pushCodeSynthesisContext(Ctx);
	}
	}
	~DeclaringSpecialMember() {
	if (!WasAlreadyBeingDeclared) {
	S.SpecialMembersBeingDeclared.erase(D);
	S.popCodeSynthesisContext();
	}
	}

	/// \brief Are we already trying to declare this special member?
	bool isAlreadyBeingDeclared() const {
	return WasAlreadyBeingDeclared;
	}
	};
	}

	void Sema::CheckImplicitSpecialMemberDeclaration(Scope S, FunctionDecl FD) {
	// Look up any existing declarations, but don't trigger declaration of all
	// implicit special members with this name.
	DeclarationName Name = FD->getDeclName();
	LookupResult R(*this, Name, SourceLocation(), LookupOrdinaryName,
	ForRedeclaration);
	for (auto *D : FD->getParent()->lookup(Name))
	if (auto *Acceptable = R.getAcceptableDecl(D))
	R.addDecl(Acceptable);
	R.resolveKind();
	R.suppressDiagnostics();

	CheckFunctionDeclaration(S, FD, R, /IsMemberSpecialization/false);
	}

	CXXConstructorDecl *Sema::DeclareImplicitDefaultConstructor(
	CXXRecordDecl *ClassDecl) {
	// C++ [class.ctor]p5:
	// A default constructor for a class X is a constructor of class X
	// that can be called without an argument. If there is no
	// user-declared constructor for class X, a default constructor is
	// implicitly declared. An implicitly-declared default constructor
	// is an inline public member of its class.
	assert(ClassDecl->needsImplicitDefaultConstructor() &&
	"Should not build implicit default constructor!");

	DeclaringSpecialMember DSM(*this, ClassDecl, CXXDefaultConstructor);
	if (DSM.isAlreadyBeingDeclared())
	return nullptr;

	bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl,
	CXXDefaultConstructor,
	false);

	// Create the actual constructor declaration.
	CanQualType ClassType
	= Context.getCanonicalType(Context.getTypeDeclType(ClassDecl));
	SourceLocation ClassLoc = ClassDecl->getLocation();
	DeclarationName Name
	= Context.DeclarationNames.getCXXConstructorName(ClassType);
	DeclarationNameInfo NameInfo(Name, ClassLoc);
	CXXConstructorDecl *DefaultCon = CXXConstructorDecl::Create(
	Context, ClassDecl, ClassLoc, NameInfo, /Type/QualType(),
	/TInfo=/nullptr, /isExplicit=/false, /isInline=/true,
	/isImplicitlyDeclared=/true, Constexpr);
	DefaultCon->setAccess(AS_public);
	DefaultCon->setDefaulted();

	if (getLangOpts().CUDA) {
	inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXDefaultConstructor,
	DefaultCon,
	/* ConstRHS */ false,
	/* Diagnose */ false);
	}

	// Build an exception specification pointing back at this constructor.
	FunctionProtoType::ExtProtoInfo EPI = getImplicitMethodEPI(*this, DefaultCon);
	DefaultCon->setType(Context.getFunctionType(Context.VoidTy, None, EPI));

	// We don't need to use SpecialMemberIsTrivial here; triviality for default
	// constructors is easy to compute.
	DefaultCon->setTrivial(ClassDecl->hasTrivialDefaultConstructor());

	// Note that we have declared this constructor.
	++ASTContext::NumImplicitDefaultConstructorsDeclared;

	Scope *S = getScopeForContext(ClassDecl);
	CheckImplicitSpecialMemberDeclaration(S, DefaultCon);

	if (ShouldDeleteSpecialMember(DefaultCon, CXXDefaultConstructor))
	SetDeclDeleted(DefaultCon, ClassLoc);

	if (S)
	PushOnScopeChains(DefaultCon, S, false);
	ClassDecl->addDecl(DefaultCon);

	return DefaultCon;
	}

	void Sema::DefineImplicitDefaultConstructor(SourceLocation CurrentLocation,
	CXXConstructorDecl *Constructor) {
	assert((Constructor->isDefaulted() && Constructor->isDefaultConstructor() &&
	!Constructor->doesThisDeclarationHaveABody() &&
	!Constructor->isDeleted()) &&
	"DefineImplicitDefaultConstructor - call it for implicit default ctor");
	if (Constructor->willHaveBody() \|\| Constructor->isInvalidDecl())
	return;

	CXXRecordDecl *ClassDecl = Constructor->getParent();
	assert(ClassDecl && "DefineImplicitDefaultConstructor - invalid constructor");

	SynthesizedFunctionScope Scope(*this, Constructor);

	// The exception specification is needed because we are defining the
	// function.
	ResolveExceptionSpec(CurrentLocation,
	Constructor->getType()->castAs<FunctionProtoType>());
	MarkVTableUsed(CurrentLocation, ClassDecl);

	// Add a context note for diagnostics produced after this point.
	Scope.addContextNote(CurrentLocation);

	if (SetCtorInitializers(Constructor, /AnyErrors=/false)) {
	Constructor->setInvalidDecl();
	return;
	}

	SourceLocation Loc = Constructor->getLocEnd().isValid()
	? Constructor->getLocEnd()
	: Constructor->getLocation();
	Constructor->setBody(new (Context) CompoundStmt(Loc));
	Constructor->markUsed(Context);

	if (ASTMutationListener *L = getASTMutationListener()) {
	L->CompletedImplicitDefinition(Constructor);
	}

	DiagnoseUninitializedFields(*this, Constructor);
	}

	void Sema::ActOnFinishDelayedMemberInitializers(Decl *D) {
	// Perform any delayed checks on exception specifications.
	CheckDelayedMemberExceptionSpecs();
	}

	/// Find or create the fake constructor we synthesize to model constructing an
	/// object of a derived class via a constructor of a base class.
	CXXConstructorDecl *
	Sema::findInheritingConstructor(SourceLocation Loc,
	CXXConstructorDecl *BaseCtor,
	ConstructorUsingShadowDecl *Shadow) {
	CXXRecordDecl *Derived = Shadow->getParent();
	SourceLocation UsingLoc = Shadow->getLocation();

	// FIXME: Add a new kind of DeclarationName for an inherited constructor.
	// For now we use the name of the base class constructor as a member of the
	// derived class to indicate a (fake) inherited constructor name.
	DeclarationName Name = BaseCtor->getDeclName();

	// Check to see if we already have a fake constructor for this inherited
	// constructor call.
	for (NamedDecl *Ctor : Derived->lookup(Name))
	if (declaresSameEntity(cast<CXXConstructorDecl>(Ctor)
	->getInheritedConstructor()
	.getConstructor(),
	BaseCtor))
	return cast<CXXConstructorDecl>(Ctor);

	DeclarationNameInfo NameInfo(Name, UsingLoc);
	TypeSourceInfo *TInfo =
	Context.getTrivialTypeSourceInfo(BaseCtor->getType(), UsingLoc);
	FunctionProtoTypeLoc ProtoLoc =
	TInfo->getTypeLoc().IgnoreParens().castAs<FunctionProtoTypeLoc>();

	// Check the inherited constructor is valid and find the list of base classes
	// from which it was inherited.
	InheritedConstructorInfo ICI(*this, Loc, Shadow);

	bool Constexpr =
	BaseCtor->isConstexpr() &&
	defaultedSpecialMemberIsConstexpr(*this, Derived, CXXDefaultConstructor,
	false, BaseCtor, &ICI);

	CXXConstructorDecl *DerivedCtor = CXXConstructorDecl::Create(
	Context, Derived, UsingLoc, NameInfo, TInfo->getType(), TInfo,
	BaseCtor->isExplicit(), /Inline=/true,
	/ImplicitlyDeclared=/true, Constexpr,
	InheritedConstructor(Shadow, BaseCtor));
	if (Shadow->isInvalidDecl())
	DerivedCtor->setInvalidDecl();

	// Build an unevaluated exception specification for this fake constructor.
	const FunctionProtoType *FPT = TInfo->getType()->castAs<FunctionProtoType>();
	FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
	EPI.ExceptionSpec.Type = EST_Unevaluated;
	EPI.ExceptionSpec.SourceDecl = DerivedCtor;
	DerivedCtor->setType(Context.getFunctionType(FPT->getReturnType(),
	FPT->getParamTypes(), EPI));

	// Build the parameter declarations.
	SmallVector<ParmVarDecl *, 16> ParamDecls;
	for (unsigned I = 0, N = FPT->getNumParams(); I != N; ++I) {
	TypeSourceInfo *TInfo =
	Context.getTrivialTypeSourceInfo(FPT->getParamType(I), UsingLoc);
	ParmVarDecl *PD = ParmVarDecl::Create(
	Context, DerivedCtor, UsingLoc, UsingLoc, /IdentifierInfo=/nullptr,
	FPT->getParamType(I), TInfo, SC_None, /DefaultArg=/nullptr);
	PD->setScopeInfo(0, I);
	PD->setImplicit();
	// Ensure attributes are propagated onto parameters (this matters for
	// format, pass_object_size, ...).
	mergeDeclAttributes(PD, BaseCtor->getParamDecl(I));
	ParamDecls.push_back(PD);
	ProtoLoc.setParam(I, PD);
	}

	// Set up the new constructor.
	assert(!BaseCtor->isDeleted() && "should not use deleted constructor");
	DerivedCtor->setAccess(BaseCtor->getAccess());
	DerivedCtor->setParams(ParamDecls);
	Derived->addDecl(DerivedCtor);

	if (ShouldDeleteSpecialMember(DerivedCtor, CXXDefaultConstructor, &ICI))
	SetDeclDeleted(DerivedCtor, UsingLoc);

	return DerivedCtor;
	}

	void Sema::NoteDeletedInheritingConstructor(CXXConstructorDecl *Ctor) {
	InheritedConstructorInfo ICI(*this, Ctor->getLocation(),
	Ctor->getInheritedConstructor().getShadowDecl());
	ShouldDeleteSpecialMember(Ctor, CXXDefaultConstructor, &ICI,
	/Diagnose/true);
	}

	void Sema::DefineInheritingConstructor(SourceLocation CurrentLocation,
	CXXConstructorDecl *Constructor) {
	CXXRecordDecl *ClassDecl = Constructor->getParent();
	assert(Constructor->getInheritedConstructor() &&
	!Constructor->doesThisDeclarationHaveABody() &&
	!Constructor->isDeleted());
	if (Constructor->willHaveBody() \|\| Constructor->isInvalidDecl())
	return;

	// Initializations are performed "as if by a defaulted default constructor",
	// so enter the appropriate scope.
	SynthesizedFunctionScope Scope(*this, Constructor);

	// The exception specification is needed because we are defining the
	// function.
	ResolveExceptionSpec(CurrentLocation,
	Constructor->getType()->castAs<FunctionProtoType>());
	MarkVTableUsed(CurrentLocation, ClassDecl);

	// Add a context note for diagnostics produced after this point.
	Scope.addContextNote(CurrentLocation);

	ConstructorUsingShadowDecl *Shadow =
	Constructor->getInheritedConstructor().getShadowDecl();
	CXXConstructorDecl *InheritedCtor =
	Constructor->getInheritedConstructor().getConstructor();

	// [class.inhctor.init]p1:
	// initialization proceeds as if a defaulted default constructor is used to
	// initialize the D object and each base class subobject from which the
	// constructor was inherited

	InheritedConstructorInfo ICI(*this, CurrentLocation, Shadow);
	CXXRecordDecl *RD = Shadow->getParent();
	SourceLocation InitLoc = Shadow->getLocation();

	// Build explicit initializers for all base classes from which the
	// constructor was inherited.
	SmallVector<CXXCtorInitializer*, 8> Inits;
	for (bool VBase : {false, true}) {
	for (CXXBaseSpecifier &B : VBase ? RD->vbases() : RD->bases()) {
	if (B.isVirtual() != VBase)
	continue;

	auto *BaseRD = B.getType()->getAsCXXRecordDecl();
	if (!BaseRD)
	continue;

	auto BaseCtor = ICI.findConstructorForBase(BaseRD, InheritedCtor);
	if (!BaseCtor.first)
	continue;

	MarkFunctionReferenced(CurrentLocation, BaseCtor.first);
	ExprResult Init = new (Context) CXXInheritedCtorInitExpr(
	InitLoc, B.getType(), BaseCtor.first, VBase, BaseCtor.second);

	auto *TInfo = Context.getTrivialTypeSourceInfo(B.getType(), InitLoc);
	Inits.push_back(new (Context) CXXCtorInitializer(
	Context, TInfo, VBase, InitLoc, Init.get(), InitLoc,
	SourceLocation()));
	}
	}

	// We now proceed as if for a defaulted default constructor, with the relevant
	// initializers replaced.

	if (SetCtorInitializers(Constructor, /AnyErrors/false, Inits)) {
	Constructor->setInvalidDecl();
	return;
	}

	Constructor->setBody(new (Context) CompoundStmt(InitLoc));
	Constructor->markUsed(Context);

	if (ASTMutationListener *L = getASTMutationListener()) {
	L->CompletedImplicitDefinition(Constructor);
	}

	DiagnoseUninitializedFields(*this, Constructor);
	}

	CXXDestructorDecl Sema::DeclareImplicitDestructor(CXXRecordDecl ClassDecl) {
	// C++ [class.dtor]p2:
	// If a class has no user-declared destructor, a destructor is
	// declared implicitly. An implicitly-declared destructor is an
	// inline public member of its class.
	assert(ClassDecl->needsImplicitDestructor());

	DeclaringSpecialMember DSM(*this, ClassDecl, CXXDestructor);
	if (DSM.isAlreadyBeingDeclared())
	return nullptr;

	// Create the actual destructor declaration.
	CanQualType ClassType
	= Context.getCanonicalType(Context.getTypeDeclType(ClassDecl));
	SourceLocation ClassLoc = ClassDecl->getLocation();
	DeclarationName Name
	= Context.DeclarationNames.getCXXDestructorName(ClassType);
	DeclarationNameInfo NameInfo(Name, ClassLoc);
	CXXDestructorDecl *Destructor
	= CXXDestructorDecl::Create(Context, ClassDecl, ClassLoc, NameInfo,
	QualType(), nullptr, /isInline=/true,
	/isImplicitlyDeclared=/true);
	Destructor->setAccess(AS_public);
	Destructor->setDefaulted();

	if (getLangOpts().CUDA) {
	inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXDestructor,
	Destructor,
	/* ConstRHS */ false,
	/* Diagnose */ false);
	}

	// Build an exception specification pointing back at this destructor.
	FunctionProtoType::ExtProtoInfo EPI = getImplicitMethodEPI(*this, Destructor);
	Destructor->setType(Context.getFunctionType(Context.VoidTy, None, EPI));

	// We don't need to use SpecialMemberIsTrivial here; triviality for
	// destructors is easy to compute.
	Destructor->setTrivial(ClassDecl->hasTrivialDestructor());

	// Note that we have declared this destructor.
	++ASTContext::NumImplicitDestructorsDeclared;

	Scope *S = getScopeForContext(ClassDecl);
	CheckImplicitSpecialMemberDeclaration(S, Destructor);

	// We can't check whether an implicit destructor is deleted before we complete
	// the definition of the class, because its validity depends on the alignment
	// of the class. We'll check this from ActOnFields once the class is complete.
	if (ClassDecl->isCompleteDefinition() &&
	ShouldDeleteSpecialMember(Destructor, CXXDestructor))
	SetDeclDeleted(Destructor, ClassLoc);

	// Introduce this destructor into its scope.
	if (S)
	PushOnScopeChains(Destructor, S, false);
	ClassDecl->addDecl(Destructor);

	return Destructor;
	}

	void Sema::DefineImplicitDestructor(SourceLocation CurrentLocation,
	CXXDestructorDecl *Destructor) {
	assert((Destructor->isDefaulted() &&
	!Destructor->doesThisDeclarationHaveABody() &&
	!Destructor->isDeleted()) &&
	"DefineImplicitDestructor - call it for implicit default dtor");
	if (Destructor->willHaveBody() \|\| Destructor->isInvalidDecl())
	return;

	CXXRecordDecl *ClassDecl = Destructor->getParent();
	assert(ClassDecl && "DefineImplicitDestructor - invalid destructor");

	SynthesizedFunctionScope Scope(*this, Destructor);

	// The exception specification is needed because we are defining the
	// function.
	ResolveExceptionSpec(CurrentLocation,
	Destructor->getType()->castAs<FunctionProtoType>());
	MarkVTableUsed(CurrentLocation, ClassDecl);

	// Add a context note for diagnostics produced after this point.
	Scope.addContextNote(CurrentLocation);

	MarkBaseAndMemberDestructorsReferenced(Destructor->getLocation(),
	Destructor->getParent());

	if (CheckDestructor(Destructor)) {
	Destructor->setInvalidDecl();
	return;
	}

	SourceLocation Loc = Destructor->getLocEnd().isValid()
	? Destructor->getLocEnd()
	: Destructor->getLocation();
	Destructor->setBody(new (Context) CompoundStmt(Loc));
	Destructor->markUsed(Context);

	if (ASTMutationListener *L = getASTMutationListener()) {
	L->CompletedImplicitDefinition(Destructor);
	}
	}

	/// \brief Perform any semantic analysis which needs to be delayed until all
	/// pending class member declarations have been parsed.
	void Sema::ActOnFinishCXXMemberDecls() {
	// If the context is an invalid C++ class, just suppress these checks.
	if (CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(CurContext)) {
	if (Record->isInvalidDecl()) {
	DelayedDefaultedMemberExceptionSpecs.clear();
	DelayedExceptionSpecChecks.clear();
	return;
	}
	checkForMultipleExportedDefaultConstructors(*this, Record);
	}
	}

	void Sema::ActOnFinishCXXNonNestedClass(Decl *D) {
	referenceDLLExportedClassMethods();
	}

	void Sema::referenceDLLExportedClassMethods() {
	if (!DelayedDllExportClasses.empty()) {
	// Calling ReferenceDllExportedMethods might cause the current function to
	// be called again, so use a local copy of DelayedDllExportClasses.
	SmallVector<CXXRecordDecl *, 4> WorkList;
	std::swap(DelayedDllExportClasses, WorkList);
	for (CXXRecordDecl *Class : WorkList)
	ReferenceDllExportedMethods(*this, Class);
	}
	}

	void Sema::AdjustDestructorExceptionSpec(CXXRecordDecl *ClassDecl,
	CXXDestructorDecl *Destructor) {
	assert(getLangOpts().CPlusPlus11 &&
	"adjusting dtor exception specs was introduced in c++11");

	// C++11 [class.dtor]p3:
	// A declaration of a destructor that does not have an exception-
	// specification is implicitly considered to have the same exception-
	// specification as an implicit declaration.
	const FunctionProtoType *DtorType = Destructor->getType()->
	getAs<FunctionProtoType>();
	if (DtorType->hasExceptionSpec())
	return;

	// Replace the destructor's type, building off the existing one. Fortunately,
	// the only thing of interest in the destructor type is its extended info.
	// The return and arguments are fixed.
	FunctionProtoType::ExtProtoInfo EPI = DtorType->getExtProtoInfo();
	EPI.ExceptionSpec.Type = EST_Unevaluated;
	EPI.ExceptionSpec.SourceDecl = Destructor;
	Destructor->setType(Context.getFunctionType(Context.VoidTy, None, EPI));

	// FIXME: If the destructor has a body that could throw, and the newly created
	// spec doesn't allow exceptions, we should emit a warning, because this
	// change in behavior can break conforming C++03 programs at runtime.
	// However, we don't have a body or an exception specification yet, so it
	// needs to be done somewhere else.
	}

	namespace {
	/// \brief An abstract base class for all helper classes used in building the
	// copy/move operators. These classes serve as factory functions and help us
	// avoid using the same Expr* in the AST twice.
	class ExprBuilder {
	ExprBuilder(const ExprBuilder&) = delete;
	ExprBuilder &operator=(const ExprBuilder&) = delete;

	protected:
	static Expr assertNotNull(Expr E) {
	assert(E && "Expression construction must not fail.");
	return E;
	}

	public:
	ExprBuilder() {}
	virtual ~ExprBuilder() {}

	virtual Expr *build(Sema &S, SourceLocation Loc) const = 0;
	};

	class RefBuilder: public ExprBuilder {
	VarDecl *Var;
	QualType VarType;

	public:
	Expr *build(Sema &S, SourceLocation Loc) const override {
	return assertNotNull(S.BuildDeclRefExpr(Var, VarType, VK_LValue, Loc).get());
	}

	RefBuilder(VarDecl *Var, QualType VarType)
	: Var(Var), VarType(VarType) {}
	};

	class ThisBuilder: public ExprBuilder {
	public:
	Expr *build(Sema &S, SourceLocation Loc) const override {
	return assertNotNull(S.ActOnCXXThis(Loc).getAs<Expr>());
	}
	};

	class CastBuilder: public ExprBuilder {
	const ExprBuilder &Builder;
	QualType Type;
	ExprValueKind Kind;
	const CXXCastPath &Path;

	public:
	Expr *build(Sema &S, SourceLocation Loc) const override {
	return assertNotNull(S.ImpCastExprToType(Builder.build(S, Loc), Type,
	CK_UncheckedDerivedToBase, Kind,
	&Path).get());
	}

	CastBuilder(const ExprBuilder &Builder, QualType Type, ExprValueKind Kind,
	const CXXCastPath &Path)
	: Builder(Builder), Type(Type), Kind(Kind), Path(Path) {}
	};

	class DerefBuilder: public ExprBuilder {
	const ExprBuilder &Builder;

	public:
	Expr *build(Sema &S, SourceLocation Loc) const override {
	return assertNotNull(
	S.CreateBuiltinUnaryOp(Loc, UO_Deref, Builder.build(S, Loc)).get());
	}

	DerefBuilder(const ExprBuilder &Builder) : Builder(Builder) {}
	};

	class MemberBuilder: public ExprBuilder {
	const ExprBuilder &Builder;
	QualType Type;
	CXXScopeSpec SS;
	bool IsArrow;
	LookupResult &MemberLookup;

	public:
	Expr *build(Sema &S, SourceLocation Loc) const override {
	return assertNotNull(S.BuildMemberReferenceExpr(
	Builder.build(S, Loc), Type, Loc, IsArrow, SS, SourceLocation(),
	nullptr, MemberLookup, nullptr, nullptr).get());
	}

	MemberBuilder(const ExprBuilder &Builder, QualType Type, bool IsArrow,
	LookupResult &MemberLookup)
	: Builder(Builder), Type(Type), IsArrow(IsArrow),
	MemberLookup(MemberLookup) {}
	};

	class MoveCastBuilder: public ExprBuilder {
	const ExprBuilder &Builder;

	public:
	Expr *build(Sema &S, SourceLocation Loc) const override {
	return assertNotNull(CastForMoving(S, Builder.build(S, Loc)));
	}

	MoveCastBuilder(const ExprBuilder &Builder) : Builder(Builder) {}
	};

	class LvalueConvBuilder: public ExprBuilder {
	const ExprBuilder &Builder;

	public:
	Expr *build(Sema &S, SourceLocation Loc) const override {
	return assertNotNull(
	S.DefaultLvalueConversion(Builder.build(S, Loc)).get());
	}

	LvalueConvBuilder(const ExprBuilder &Builder) : Builder(Builder) {}
	};

	class SubscriptBuilder: public ExprBuilder {
	const ExprBuilder &Base;
	const ExprBuilder &Index;

	public:
	Expr *build(Sema &S, SourceLocation Loc) const override {
	return assertNotNull(S.CreateBuiltinArraySubscriptExpr(
	Base.build(S, Loc), Loc, Index.build(S, Loc), Loc).get());
	}

	SubscriptBuilder(const ExprBuilder &Base, const ExprBuilder &Index)
	: Base(Base), Index(Index) {}
	};

	} // end anonymous namespace

	/// When generating a defaulted copy or move assignment operator, if a field
	/// should be copied with __builtin_memcpy rather than via explicit assignments,
	/// do so. This optimization only applies for arrays of scalars, and for arrays
	/// of class type where the selected copy/move-assignment operator is trivial.
	static StmtResult
	buildMemcpyForAssignmentOp(Sema &S, SourceLocation Loc, QualType T,
	const ExprBuilder &ToB, const ExprBuilder &FromB) {
	// Compute the size of the memory buffer to be copied.
	QualType SizeType = S.Context.getSizeType();
	llvm::APInt Size(S.Context.getTypeSize(SizeType),
	S.Context.getTypeSizeInChars(T).getQuantity());

	// Take the address of the field references for "from" and "to". We
	// directly construct UnaryOperators here because semantic analysis
	// does not permit us to take the address of an xvalue.
	Expr *From = FromB.build(S, Loc);
	From = new (S.Context) UnaryOperator(From, UO_AddrOf,
	S.Context.getPointerType(From->getType()),
	VK_RValue, OK_Ordinary, Loc);
	Expr *To = ToB.build(S, Loc);
	To = new (S.Context) UnaryOperator(To, UO_AddrOf,
	S.Context.getPointerType(To->getType()),
	VK_RValue, OK_Ordinary, Loc);

	const Type *E = T->getBaseElementTypeUnsafe();
	bool NeedsCollectableMemCpy =
	E->isRecordType() && E->getAs<RecordType>()->getDecl()->hasObjectMember();

	// Create a reference to the __builtin_objc_memmove_collectable function
	StringRef MemCpyName = NeedsCollectableMemCpy ?
	"__builtin_objc_memmove_collectable" :
	"__builtin_memcpy";
	LookupResult R(S, &S.Context.Idents.get(MemCpyName), Loc,
	Sema::LookupOrdinaryName);
	S.LookupName(R, S.TUScope, true);

	FunctionDecl *MemCpy = R.getAsSingle<FunctionDecl>();
	if (!MemCpy)
	// Something went horribly wrong earlier, and we will have complained
	// about it.
	return StmtError();

	ExprResult MemCpyRef = S.BuildDeclRefExpr(MemCpy, S.Context.BuiltinFnTy,
	VK_RValue, Loc, nullptr);
	assert(MemCpyRef.isUsable() && "Builtin reference cannot fail");

	Expr *CallArgs[] = {
	To, From, IntegerLiteral::Create(S.Context, Size, SizeType, Loc)
	};
	ExprResult Call = S.ActOnCallExpr(/Scope=/nullptr, MemCpyRef.get(),
	Loc, CallArgs, Loc);

	assert(!Call.isInvalid() && "Call to __builtin_memcpy cannot fail!");
	return Call.getAs<Stmt>();
	}

	/// \brief Builds a statement that copies/moves the given entity from \p From to
	/// \c To.
	///
	/// This routine is used to copy/move the members of a class with an
	/// implicitly-declared copy/move assignment operator. When the entities being
	/// copied are arrays, this routine builds for loops to copy them.
	///
	/// \param S The Sema object used for type-checking.
	///
	/// \param Loc The location where the implicit copy/move is being generated.
	///
	/// \param T The type of the expressions being copied/moved. Both expressions
	/// must have this type.
	///
	/// \param To The expression we are copying/moving to.
	///
	/// \param From The expression we are copying/moving from.
	///
	/// \param CopyingBaseSubobject Whether we're copying/moving a base subobject.
	/// Otherwise, it's a non-static member subobject.
	///
	/// \param Copying Whether we're copying or moving.
	///
	/// \param Depth Internal parameter recording the depth of the recursion.
	///
	/// \returns A statement or a loop that copies the expressions, or StmtResult(0)
	/// if a memcpy should be used instead.
	static StmtResult
	buildSingleCopyAssignRecursively(Sema &S, SourceLocation Loc, QualType T,
	const ExprBuilder &To, const ExprBuilder &From,
	bool CopyingBaseSubobject, bool Copying,
	unsigned Depth = 0) {
	// C++11 [class.copy]p28:
	// Each subobject is assigned in the manner appropriate to its type:
	//
	// - if the subobject is of class type, as if by a call to operator= with
	// the subobject as the object expression and the corresponding
	// subobject of x as a single function argument (as if by explicit
	// qualification; that is, ignoring any possible virtual overriding
	// functions in more derived classes);
	//
	// C++03 [class.copy]p13:
	// - if the subobject is of class type, the copy assignment operator for
	// the class is used (as if by explicit qualification; that is,
	// ignoring any possible virtual overriding functions in more derived
	// classes);
	if (const RecordType *RecordTy = T->getAs<RecordType>()) {
	CXXRecordDecl *ClassDecl = cast<CXXRecordDecl>(RecordTy->getDecl());

	// Look for operator=.
	DeclarationName Name
	= S.Context.DeclarationNames.getCXXOperatorName(OO_Equal);
	LookupResult OpLookup(S, Name, Loc, Sema::LookupOrdinaryName);
	S.LookupQualifiedName(OpLookup, ClassDecl, false);

	// Prior to C++11, filter out any result that isn't a copy/move-assignment
	// operator.
	if (!S.getLangOpts().CPlusPlus11) {
	LookupResult::Filter F = OpLookup.makeFilter();
	while (F.hasNext()) {
	NamedDecl *D = F.next();
	if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D))
	if (Method->isCopyAssignmentOperator() \|\|
	(!Copying && Method->isMoveAssignmentOperator()))
	continue;

	F.erase();
	}
	F.done();
	}

	// Suppress the protected check (C++ [class.protected]) for each of the
	// assignment operators we found. This strange dance is required when
	// we're assigning via a base classes's copy-assignment operator. To
	// ensure that we're getting the right base class subobject (without
	// ambiguities), we need to cast "this" to that subobject type; to
	// ensure that we don't go through the virtual call mechanism, we need
	// to qualify the operator= name with the base class (see below). However,
	// this means that if the base class has a protected copy assignment
	// operator, the protected member access check will fail. So, we
	// rewrite "protected" access to "public" access in this case, since we
	// know by construction that we're calling from a derived class.
	if (CopyingBaseSubobject) {
	for (LookupResult::iterator L = OpLookup.begin(), LEnd = OpLookup.end();
	L != LEnd; ++L) {
	if (L.getAccess() == AS_protected)
	L.setAccess(AS_public);
	}
	}

	// Create the nested-name-specifier that will be used to qualify the
	// reference to operator=; this is required to suppress the virtual
	// call mechanism.
	CXXScopeSpec SS;
	const Type *CanonicalT = S.Context.getCanonicalType(T.getTypePtr());
	SS.MakeTrivial(S.Context,
	NestedNameSpecifier::Create(S.Context, nullptr, false,
	CanonicalT),
	Loc);

	// Create the reference to operator=.
	ExprResult OpEqualRef
	= S.BuildMemberReferenceExpr(To.build(S, Loc), T, Loc, /isArrow=/false,
	SS, /TemplateKWLoc=/SourceLocation(),
	/FirstQualifierInScope=/nullptr,
	OpLookup,
	/TemplateArgs=/nullptr, /S/nullptr,
	/SuppressQualifierCheck=/true);
	if (OpEqualRef.isInvalid())
	return StmtError();

	// Build the call to the assignment operator.

	Expr *FromInst = From.build(S, Loc);
	ExprResult Call = S.BuildCallToMemberFunction(/Scope=/nullptr,
	OpEqualRef.getAs<Expr>(),
	Loc, FromInst, Loc);
	if (Call.isInvalid())
	return StmtError();

	// If we built a call to a trivial 'operator=' while copying an array,
	// bail out. We'll replace the whole shebang with a memcpy.
	CXXMemberCallExpr *CE = dyn_cast<CXXMemberCallExpr>(Call.get());
	if (CE && CE->getMethodDecl()->isTrivial() && Depth)
	return StmtResult((Stmt*)nullptr);

	// Convert to an expression-statement, and clean up any produced
	// temporaries.
	return S.ActOnExprStmt(Call);
	}

	// - if the subobject is of scalar type, the built-in assignment
	// operator is used.
	const ConstantArrayType *ArrayTy = S.Context.getAsConstantArrayType(T);
	if (!ArrayTy) {
	ExprResult Assignment = S.CreateBuiltinBinOp(
	Loc, BO_Assign, To.build(S, Loc), From.build(S, Loc));
	if (Assignment.isInvalid())
	return StmtError();
	return S.ActOnExprStmt(Assignment);
	}

	// - if the subobject is an array, each element is assigned, in the
	// manner appropriate to the element type;

	// Construct a loop over the array bounds, e.g.,
	//
	// for (__SIZE_TYPE__ i0 = 0; i0 != array-size; ++i0)
	//
	// that will copy each of the array elements.
	QualType SizeType = S.Context.getSizeType();

	// Create the iteration variable.
	IdentifierInfo *IterationVarName = nullptr;
	{
	SmallString<8> Str;
	llvm::raw_svector_ostream OS(Str);
	OS << "__i" << Depth;
	IterationVarName = &S.Context.Idents.get(OS.str());
	}
	VarDecl *IterationVar = VarDecl::Create(S.Context, S.CurContext, Loc, Loc,
	IterationVarName, SizeType,
	S.Context.getTrivialTypeSourceInfo(SizeType, Loc),
	SC_None);

	// Initialize the iteration variable to zero.
	llvm::APInt Zero(S.Context.getTypeSize(SizeType), 0);
	IterationVar->setInit(IntegerLiteral::Create(S.Context, Zero, SizeType, Loc));

	// Creates a reference to the iteration variable.
	RefBuilder IterationVarRef(IterationVar, SizeType);
	LvalueConvBuilder IterationVarRefRVal(IterationVarRef);

	// Create the DeclStmt that holds the iteration variable.
	Stmt *InitStmt = new (S.Context) DeclStmt(DeclGroupRef(IterationVar),Loc,Loc);

	// Subscript the "from" and "to" expressions with the iteration variable.
	SubscriptBuilder FromIndexCopy(From, IterationVarRefRVal);
	MoveCastBuilder FromIndexMove(FromIndexCopy);
	const ExprBuilder *FromIndex;
	if (Copying)
	FromIndex = &FromIndexCopy;
	else
	FromIndex = &FromIndexMove;

	SubscriptBuilder ToIndex(To, IterationVarRefRVal);

	// Build the copy/move for an individual element of the array.
	StmtResult Copy =
	buildSingleCopyAssignRecursively(S, Loc, ArrayTy->getElementType(),
	ToIndex, *FromIndex, CopyingBaseSubobject,
	Copying, Depth + 1);
	// Bail out if copying fails or if we determined that we should use memcpy.
	if (Copy.isInvalid() \|\| !Copy.get())
	return Copy;

	// Create the comparison against the array bound.
	llvm::APInt Upper
	= ArrayTy->getSize().zextOrTrunc(S.Context.getTypeSize(SizeType));
	Expr *Comparison
	= new (S.Context) BinaryOperator(IterationVarRefRVal.build(S, Loc),
	IntegerLiteral::Create(S.Context, Upper, SizeType, Loc),
	BO_NE, S.Context.BoolTy,
	VK_RValue, OK_Ordinary, Loc, FPOptions());

	// Create the pre-increment of the iteration variable.
	Expr *Increment
	= new (S.Context) UnaryOperator(IterationVarRef.build(S, Loc), UO_PreInc,
	SizeType, VK_LValue, OK_Ordinary, Loc);

	// Construct the loop that copies all elements of this array.
	return S.ActOnForStmt(
	Loc, Loc, InitStmt,
	S.ActOnCondition(nullptr, Loc, Comparison, Sema::ConditionKind::Boolean),
	S.MakeFullDiscardedValueExpr(Increment), Loc, Copy.get());
	}

	static StmtResult
	buildSingleCopyAssign(Sema &S, SourceLocation Loc, QualType T,
	const ExprBuilder &To, const ExprBuilder &From,
	bool CopyingBaseSubobject, bool Copying) {
	// Maybe we should use a memcpy?
	if (T->isArrayType() && !T.isConstQualified() && !T.isVolatileQualified() &&
	T.isTriviallyCopyableType(S.Context))
	return buildMemcpyForAssignmentOp(S, Loc, T, To, From);

	StmtResult Result(buildSingleCopyAssignRecursively(S, Loc, T, To, From,
	CopyingBaseSubobject,
	Copying, 0));

	// If we ended up picking a trivial assignment operator for an array of a
	// non-trivially-copyable class type, just emit a memcpy.
	if (!Result.isInvalid() && !Result.get())
	return buildMemcpyForAssignmentOp(S, Loc, T, To, From);

	return Result;
	}

	CXXMethodDecl Sema::DeclareImplicitCopyAssignment(CXXRecordDecl ClassDecl) {
	// Note: The following rules are largely analoguous to the copy
	// constructor rules. Note that virtual bases are not taken into account
	// for determining the argument type of the operator. Note also that
	// operators taking an object instead of a reference are allowed.
	assert(ClassDecl->needsImplicitCopyAssignment());

	DeclaringSpecialMember DSM(*this, ClassDecl, CXXCopyAssignment);
	if (DSM.isAlreadyBeingDeclared())
	return nullptr;

	QualType ArgType = Context.getTypeDeclType(ClassDecl);
	QualType RetType = Context.getLValueReferenceType(ArgType);
	bool Const = ClassDecl->implicitCopyAssignmentHasConstParam();
	if (Const)
	ArgType = ArgType.withConst();
	ArgType = Context.getLValueReferenceType(ArgType);

	bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl,
	CXXCopyAssignment,
	Const);

	// An implicitly-declared copy assignment operator is an inline public
	// member of its class.
	DeclarationName Name = Context.DeclarationNames.getCXXOperatorName(OO_Equal);
	SourceLocation ClassLoc = ClassDecl->getLocation();
	DeclarationNameInfo NameInfo(Name, ClassLoc);
	CXXMethodDecl *CopyAssignment =
	CXXMethodDecl::Create(Context, ClassDecl, ClassLoc, NameInfo, QualType(),
	/TInfo=/nullptr, /StorageClass=/SC_None,
	/isInline=/true, Constexpr, SourceLocation());
	CopyAssignment->setAccess(AS_public);
	CopyAssignment->setDefaulted();
	CopyAssignment->setImplicit();

	if (getLangOpts().CUDA) {
	inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXCopyAssignment,
	CopyAssignment,
	/* ConstRHS */ Const,
	/* Diagnose */ false);
	}

	// Build an exception specification pointing back at this member.
	FunctionProtoType::ExtProtoInfo EPI =
	getImplicitMethodEPI(*this, CopyAssignment);
	CopyAssignment->setType(Context.getFunctionType(RetType, ArgType, EPI));

	// Add the parameter to the operator.
	ParmVarDecl *FromParam = ParmVarDecl::Create(Context, CopyAssignment,
	ClassLoc, ClassLoc,
	/Id=/nullptr, ArgType,
	/TInfo=/nullptr, SC_None,
	nullptr);
	CopyAssignment->setParams(FromParam);

	CopyAssignment->setTrivial(
	ClassDecl->needsOverloadResolutionForCopyAssignment()
	? SpecialMemberIsTrivial(CopyAssignment, CXXCopyAssignment)
	: ClassDecl->hasTrivialCopyAssignment());

	// Note that we have added this copy-assignment operator.
	++ASTContext::NumImplicitCopyAssignmentOperatorsDeclared;

	Scope *S = getScopeForContext(ClassDecl);
	CheckImplicitSpecialMemberDeclaration(S, CopyAssignment);

	if (ShouldDeleteSpecialMember(CopyAssignment, CXXCopyAssignment))
	SetDeclDeleted(CopyAssignment, ClassLoc);

	if (S)
	PushOnScopeChains(CopyAssignment, S, false);
	ClassDecl->addDecl(CopyAssignment);

	return CopyAssignment;
	}

	/// Diagnose an implicit copy operation for a class which is odr-used, but
	/// which is deprecated because the class has a user-declared copy constructor,
	/// copy assignment operator, or destructor.
	static void diagnoseDeprecatedCopyOperation(Sema &S, CXXMethodDecl *CopyOp) {
	assert(CopyOp->isImplicit());

	CXXRecordDecl *RD = CopyOp->getParent();
	CXXMethodDecl *UserDeclaredOperation = nullptr;

	// In Microsoft mode, assignment operations don't affect constructors and
	// vice versa.
	if (RD->hasUserDeclaredDestructor()) {
	UserDeclaredOperation = RD->getDestructor();
	} else if (!isa<CXXConstructorDecl>(CopyOp) &&
	RD->hasUserDeclaredCopyConstructor() &&
	!S.getLangOpts().MSVCCompat) {
	// Find any user-declared copy constructor.
	for (auto *I : RD->ctors()) {
	if (I->isCopyConstructor()) {
	UserDeclaredOperation = I;
	break;
	}
	}
	assert(UserDeclaredOperation);
	} else if (isa<CXXConstructorDecl>(CopyOp) &&
	RD->hasUserDeclaredCopyAssignment() &&
	!S.getLangOpts().MSVCCompat) {
	// Find any user-declared move assignment operator.
	for (auto *I : RD->methods()) {
	if (I->isCopyAssignmentOperator()) {
	UserDeclaredOperation = I;
	break;
	}
	}
	assert(UserDeclaredOperation);
	}

	if (UserDeclaredOperation) {
	S.Diag(UserDeclaredOperation->getLocation(),
	diag::warn_deprecated_copy_operation)
	<< RD << /copy assignment/!isa<CXXConstructorDecl>(CopyOp)
	<< /destructor/isa<CXXDestructorDecl>(UserDeclaredOperation);
	}
	}

	void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation,
	CXXMethodDecl *CopyAssignOperator) {
	assert((CopyAssignOperator->isDefaulted() &&
	CopyAssignOperator->isOverloadedOperator() &&
	CopyAssignOperator->getOverloadedOperator() == OO_Equal &&
	!CopyAssignOperator->doesThisDeclarationHaveABody() &&
	!CopyAssignOperator->isDeleted()) &&
	"DefineImplicitCopyAssignment called for wrong function");
	if (CopyAssignOperator->willHaveBody() \|\| CopyAssignOperator->isInvalidDecl())
	return;

	CXXRecordDecl *ClassDecl = CopyAssignOperator->getParent();
	if (ClassDecl->isInvalidDecl()) {
	CopyAssignOperator->setInvalidDecl();
	return;
	}

	SynthesizedFunctionScope Scope(*this, CopyAssignOperator);

	// The exception specification is needed because we are defining the
	// function.
	ResolveExceptionSpec(CurrentLocation,
	CopyAssignOperator->getType()->castAs<FunctionProtoType>());

	// Add a context note for diagnostics produced after this point.
	Scope.addContextNote(CurrentLocation);

	// C++11 [class.copy]p18:
	// The [definition of an implicitly declared copy assignment operator] is
	// deprecated if the class has a user-declared copy constructor or a
	// user-declared destructor.
	if (getLangOpts().CPlusPlus11 && CopyAssignOperator->isImplicit())
	diagnoseDeprecatedCopyOperation(*this, CopyAssignOperator);

	// C++0x [class.copy]p30:
	// The implicitly-defined or explicitly-defaulted copy assignment operator
	// for a non-union class X performs memberwise copy assignment of its
	// subobjects. The direct base classes of X are assigned first, in the
	// order of their declaration in the base-specifier-list, and then the
	// immediate non-static data members of X are assigned, in the order in
	// which they were declared in the class definition.

	// The statements that form the synthesized function body.
	SmallVector<Stmt*, 8> Statements;

	// The parameter for the "other" object, which we are copying from.
	ParmVarDecl *Other = CopyAssignOperator->getParamDecl(0);
	Qualifiers OtherQuals = Other->getType().getQualifiers();
	QualType OtherRefType = Other->getType();
	if (const LValueReferenceType *OtherRef
	= OtherRefType->getAs<LValueReferenceType>()) {
	OtherRefType = OtherRef->getPointeeType();
	OtherQuals = OtherRefType.getQualifiers();
	}

	// Our location for everything implicitly-generated.
	SourceLocation Loc = CopyAssignOperator->getLocEnd().isValid()
	? CopyAssignOperator->getLocEnd()
	: CopyAssignOperator->getLocation();

	// Builds a DeclRefExpr for the "other" object.
	RefBuilder OtherRef(Other, OtherRefType);

	// Builds the "this" pointer.
	ThisBuilder This;

	// Assign base classes.
	bool Invalid = false;
	for (auto &Base : ClassDecl->bases()) {
	// Form the assignment:
	// static_cast<Base*>(this)->Base::operator=(static_cast<Base&>(other));
	QualType BaseType = Base.getType().getUnqualifiedType();
	if (!BaseType->isRecordType()) {
	Invalid = true;
	continue;
	}

	CXXCastPath BasePath;
	BasePath.push_back(&Base);

	// Construct the "from" expression, which is an implicit cast to the
	// appropriately-qualified base type.
	CastBuilder From(OtherRef, Context.getQualifiedType(BaseType, OtherQuals),
	VK_LValue, BasePath);

	// Dereference "this".
	DerefBuilder DerefThis(This);
	CastBuilder To(DerefThis,
	Context.getCVRQualifiedType(
	BaseType, CopyAssignOperator->getTypeQualifiers()),
	VK_LValue, BasePath);

	// Build the copy.
	StmtResult Copy = buildSingleCopyAssign(*this, Loc, BaseType,
	To, From,
	/CopyingBaseSubobject=/true,
	/Copying=/true);
	if (Copy.isInvalid()) {
	CopyAssignOperator->setInvalidDecl();
	return;
	}

	// Success! Record the copy.
	Statements.push_back(Copy.getAs<Expr>());
	}

	// Assign non-static members.
	for (auto *Field : ClassDecl->fields()) {
	// FIXME: We should form some kind of AST representation for the implied
	// memcpy in a union copy operation.
	if (Field->isUnnamedBitfield() \|\| Field->getParent()->isUnion())
	continue;

	if (Field->isInvalidDecl()) {
	Invalid = true;
	continue;
	}

	// Check for members of reference type; we can't copy those.
	if (Field->getType()->isReferenceType()) {
	Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign)
	<< Context.getTagDeclType(ClassDecl) << 0 << Field->getDeclName();
	Diag(Field->getLocation(), diag::note_declared_at);
	Invalid = true;
	continue;
	}

	// Check for members of const-qualified, non-class type.
	QualType BaseType = Context.getBaseElementType(Field->getType());
	if (!BaseType->getAs<RecordType>() && BaseType.isConstQualified()) {
	Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign)
	<< Context.getTagDeclType(ClassDecl) << 1 << Field->getDeclName();
	Diag(Field->getLocation(), diag::note_declared_at);
	Invalid = true;
	continue;
	}

	// Suppress assigning zero-width bitfields.
	if (Field->isBitField() && Field->getBitWidthValue(Context) == 0)
	continue;

	QualType FieldType = Field->getType().getNonReferenceType();
	if (FieldType->isIncompleteArrayType()) {
	assert(ClassDecl->hasFlexibleArrayMember() &&
	"Incomplete array type is not valid");
	continue;
	}

	// Build references to the field in the object we're copying from and to.
	CXXScopeSpec SS; // Intentionally empty
	LookupResult MemberLookup(*this, Field->getDeclName(), Loc,
	LookupMemberName);
	MemberLookup.addDecl(Field);
	MemberLookup.resolveKind();

	MemberBuilder From(OtherRef, OtherRefType, /IsArrow=/false, MemberLookup);

	MemberBuilder To(This, getCurrentThisType(), /IsArrow=/true, MemberLookup);

	// Build the copy of this field.
	StmtResult Copy = buildSingleCopyAssign(*this, Loc, FieldType,
	To, From,
	/CopyingBaseSubobject=/false,
	/Copying=/true);
	if (Copy.isInvalid()) {
	CopyAssignOperator->setInvalidDecl();
	return;
	}

	// Success! Record the copy.
	Statements.push_back(Copy.getAs<Stmt>());
	}

	if (!Invalid) {
	// Add a "return *this;"
	ExprResult ThisObj = CreateBuiltinUnaryOp(Loc, UO_Deref, This.build(*this, Loc));

	StmtResult Return = BuildReturnStmt(Loc, ThisObj.get());
	if (Return.isInvalid())
	Invalid = true;
	else
	Statements.push_back(Return.getAs<Stmt>());
	}

	if (Invalid) {
	CopyAssignOperator->setInvalidDecl();
	return;
	}

	StmtResult Body;
	{
	CompoundScopeRAII CompoundScope(*this);
	Body = ActOnCompoundStmt(Loc, Loc, Statements,
	/isStmtExpr=/false);
	assert(!Body.isInvalid() && "Compound statement creation cannot fail");
	}
	CopyAssignOperator->setBody(Body.getAs<Stmt>());
	CopyAssignOperator->markUsed(Context);

	if (ASTMutationListener *L = getASTMutationListener()) {
	L->CompletedImplicitDefinition(CopyAssignOperator);
	}
	}

	CXXMethodDecl Sema::DeclareImplicitMoveAssignment(CXXRecordDecl ClassDecl) {
	assert(ClassDecl->needsImplicitMoveAssignment());

	DeclaringSpecialMember DSM(*this, ClassDecl, CXXMoveAssignment);
	if (DSM.isAlreadyBeingDeclared())
	return nullptr;

	// Note: The following rules are largely analoguous to the move
	// constructor rules.

	QualType ArgType = Context.getTypeDeclType(ClassDecl);
	QualType RetType = Context.getLValueReferenceType(ArgType);
	ArgType = Context.getRValueReferenceType(ArgType);

	bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl,
	CXXMoveAssignment,
	false);

	// An implicitly-declared move assignment operator is an inline public
	// member of its class.
	DeclarationName Name = Context.DeclarationNames.getCXXOperatorName(OO_Equal);
	SourceLocation ClassLoc = ClassDecl->getLocation();
	DeclarationNameInfo NameInfo(Name, ClassLoc);
	CXXMethodDecl *MoveAssignment =
	CXXMethodDecl::Create(Context, ClassDecl, ClassLoc, NameInfo, QualType(),
	/TInfo=/nullptr, /StorageClass=/SC_None,
	/isInline=/true, Constexpr, SourceLocation());
	MoveAssignment->setAccess(AS_public);
	MoveAssignment->setDefaulted();
	MoveAssignment->setImplicit();

	if (getLangOpts().CUDA) {
	inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXMoveAssignment,
	MoveAssignment,
	/* ConstRHS */ false,
	/* Diagnose */ false);
	}

	// Build an exception specification pointing back at this member.
	FunctionProtoType::ExtProtoInfo EPI =
	getImplicitMethodEPI(*this, MoveAssignment);
	MoveAssignment->setType(Context.getFunctionType(RetType, ArgType, EPI));

	// Add the parameter to the operator.
	ParmVarDecl *FromParam = ParmVarDecl::Create(Context, MoveAssignment,
	ClassLoc, ClassLoc,
	/Id=/nullptr, ArgType,
	/TInfo=/nullptr, SC_None,
	nullptr);
	MoveAssignment->setParams(FromParam);

	MoveAssignment->setTrivial(
	ClassDecl->needsOverloadResolutionForMoveAssignment()
	? SpecialMemberIsTrivial(MoveAssignment, CXXMoveAssignment)
	: ClassDecl->hasTrivialMoveAssignment());

	// Note that we have added this copy-assignment operator.
	++ASTContext::NumImplicitMoveAssignmentOperatorsDeclared;

	Scope *S = getScopeForContext(ClassDecl);
	CheckImplicitSpecialMemberDeclaration(S, MoveAssignment);

	if (ShouldDeleteSpecialMember(MoveAssignment, CXXMoveAssignment)) {
	ClassDecl->setImplicitMoveAssignmentIsDeleted();
	SetDeclDeleted(MoveAssignment, ClassLoc);
	}

	if (S)
	PushOnScopeChains(MoveAssignment, S, false);
	ClassDecl->addDecl(MoveAssignment);

	return MoveAssignment;
	}

	/// Check if we're implicitly defining a move assignment operator for a class
	/// with virtual bases. Such a move assignment might move-assign the virtual
	/// base multiple times.
	static void checkMoveAssignmentForRepeatedMove(Sema &S, CXXRecordDecl *Class,
	SourceLocation CurrentLocation) {
	assert(!Class->isDependentContext() && "should not define dependent move");

	// Only a virtual base could get implicitly move-assigned multiple times.
	// Only a non-trivial move assignment can observe this. We only want to
	// diagnose if we implicitly define an assignment operator that assigns
	// two base classes, both of which move-assign the same virtual base.
	if (Class->getNumVBases() == 0 \|\| Class->hasTrivialMoveAssignment() \|\|
	Class->getNumBases() < 2)
	return;

	llvm::SmallVector<CXXBaseSpecifier *, 16> Worklist;
	typedef llvm::DenseMap<CXXRecordDecl, CXXBaseSpecifier> VBaseMap;
	VBaseMap VBases;

	for (auto &BI : Class->bases()) {
	Worklist.push_back(&BI);
	while (!Worklist.empty()) {
	CXXBaseSpecifier *BaseSpec = Worklist.pop_back_val();
	CXXRecordDecl *Base = BaseSpec->getType()->getAsCXXRecordDecl();

	// If the base has no non-trivial move assignment operators,
	// we don't care about moves from it.
	if (!Base->hasNonTrivialMoveAssignment())
	continue;

	// If there's nothing virtual here, skip it.
	if (!BaseSpec->isVirtual() && !Base->getNumVBases())
	continue;

	// If we're not actually going to call a move assignment for this base,
	// or the selected move assignment is trivial, skip it.
	Sema::SpecialMemberOverloadResult SMOR =
	S.LookupSpecialMember(Base, Sema::CXXMoveAssignment,
	/ConstArg/false, /VolatileArg/false,
	/RValueThis/true, /ConstThis/false,
	/VolatileThis/false);
	if (!SMOR.getMethod() \|\| SMOR.getMethod()->isTrivial() \|\|
	!SMOR.getMethod()->isMoveAssignmentOperator())
	continue;

	if (BaseSpec->isVirtual()) {
	// We're going to move-assign this virtual base, and its move
	// assignment operator is not trivial. If this can happen for
	// multiple distinct direct bases of Class, diagnose it. (If it
	// only happens in one base, we'll diagnose it when synthesizing
	// that base class's move assignment operator.)
	CXXBaseSpecifier *&Existing =
	VBases.insert(std::make_pair(Base->getCanonicalDecl(), &BI))
	.first->second;
	if (Existing && Existing != &BI) {
	S.Diag(CurrentLocation, diag::warn_vbase_moved_multiple_times)
	<< Class << Base;
	S.Diag(Existing->getLocStart(), diag::note_vbase_moved_here)
	<< (Base->getCanonicalDecl() ==
	Existing->getType()->getAsCXXRecordDecl()->getCanonicalDecl())
	<< Base << Existing->getType() << Existing->getSourceRange();
	S.Diag(BI.getLocStart(), diag::note_vbase_moved_here)
	<< (Base->getCanonicalDecl() ==
	BI.getType()->getAsCXXRecordDecl()->getCanonicalDecl())
	<< Base << BI.getType() << BaseSpec->getSourceRange();

	// Only diagnose each vbase once.
	Existing = nullptr;
	}
	} else {
	// Only walk over bases that have defaulted move assignment operators.
	// We assume that any user-provided move assignment operator handles
	// the multiple-moves-of-vbase case itself somehow.
	if (!SMOR.getMethod()->isDefaulted())
	continue;

	// We're going to move the base classes of Base. Add them to the list.
	for (auto &BI : Base->bases())
	Worklist.push_back(&BI);
	}
	}
	}
	}

	void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation,
	CXXMethodDecl *MoveAssignOperator) {
	assert((MoveAssignOperator->isDefaulted() &&
	MoveAssignOperator->isOverloadedOperator() &&
	MoveAssignOperator->getOverloadedOperator() == OO_Equal &&
	!MoveAssignOperator->doesThisDeclarationHaveABody() &&
	!MoveAssignOperator->isDeleted()) &&
	"DefineImplicitMoveAssignment called for wrong function");
	if (MoveAssignOperator->willHaveBody() \|\| MoveAssignOperator->isInvalidDecl())
	return;

	CXXRecordDecl *ClassDecl = MoveAssignOperator->getParent();
	if (ClassDecl->isInvalidDecl()) {
	MoveAssignOperator->setInvalidDecl();
	return;
	}

	// C++0x [class.copy]p28:
	// The implicitly-defined or move assignment operator for a non-union class
	// X performs memberwise move assignment of its subobjects. The direct base
	// classes of X are assigned first, in the order of their declaration in the
	// base-specifier-list, and then the immediate non-static data members of X
	// are assigned, in the order in which they were declared in the class
	// definition.

	// Issue a warning if our implicit move assignment operator will move
	// from a virtual base more than once.
	checkMoveAssignmentForRepeatedMove(*this, ClassDecl, CurrentLocation);

	SynthesizedFunctionScope Scope(*this, MoveAssignOperator);

	// The exception specification is needed because we are defining the
	// function.
	ResolveExceptionSpec(CurrentLocation,
	MoveAssignOperator->getType()->castAs<FunctionProtoType>());

	// Add a context note for diagnostics produced after this point.
	Scope.addContextNote(CurrentLocation);

	// The statements that form the synthesized function body.
	SmallVector<Stmt*, 8> Statements;

	// The parameter for the "other" object, which we are move from.
	ParmVarDecl *Other = MoveAssignOperator->getParamDecl(0);
	QualType OtherRefType = Other->getType()->
	getAs<RValueReferenceType>()->getPointeeType();
	assert(!OtherRefType.getQualifiers() &&
	"Bad argument type of defaulted move assignment");

	// Our location for everything implicitly-generated.
	SourceLocation Loc = MoveAssignOperator->getLocEnd().isValid()
	? MoveAssignOperator->getLocEnd()
	: MoveAssignOperator->getLocation();

	// Builds a reference to the "other" object.
	RefBuilder OtherRef(Other, OtherRefType);
	// Cast to rvalue.
	MoveCastBuilder MoveOther(OtherRef);

	// Builds the "this" pointer.
	ThisBuilder This;

	// Assign base classes.
	bool Invalid = false;
	for (auto &Base : ClassDecl->bases()) {
	// C++11 [class.copy]p28:
	// It is unspecified whether subobjects representing virtual base classes
	// are assigned more than once by the implicitly-defined copy assignment
	// operator.
	// FIXME: Do not assign to a vbase that will be assigned by some other base
	// class. For a move-assignment, this can result in the vbase being moved
	// multiple times.

	// Form the assignment:
	// static_cast<Base*>(this)->Base::operator=(static_cast<Base&&>(other));
	QualType BaseType = Base.getType().getUnqualifiedType();
	if (!BaseType->isRecordType()) {
	Invalid = true;
	continue;
	}

	CXXCastPath BasePath;
	BasePath.push_back(&Base);

	// Construct the "from" expression, which is an implicit cast to the
	// appropriately-qualified base type.
	CastBuilder From(OtherRef, BaseType, VK_XValue, BasePath);

	// Dereference "this".
	DerefBuilder DerefThis(This);

	// Implicitly cast "this" to the appropriately-qualified base type.
	CastBuilder To(DerefThis,
	Context.getCVRQualifiedType(
	BaseType, MoveAssignOperator->getTypeQualifiers()),
	VK_LValue, BasePath);

	// Build the move.
	StmtResult Move = buildSingleCopyAssign(*this, Loc, BaseType,
	To, From,
	/CopyingBaseSubobject=/true,
	/Copying=/false);
	if (Move.isInvalid()) {
	MoveAssignOperator->setInvalidDecl();
	return;
	}

	// Success! Record the move.
	Statements.push_back(Move.getAs<Expr>());
	}

	// Assign non-static members.
	for (auto *Field : ClassDecl->fields()) {
	// FIXME: We should form some kind of AST representation for the implied
	// memcpy in a union copy operation.
	if (Field->isUnnamedBitfield() \|\| Field->getParent()->isUnion())
	continue;

	if (Field->isInvalidDecl()) {
	Invalid = true;
	continue;
	}

	// Check for members of reference type; we can't move those.
	if (Field->getType()->isReferenceType()) {
	Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign)
	<< Context.getTagDeclType(ClassDecl) << 0 << Field->getDeclName();
	Diag(Field->getLocation(), diag::note_declared_at);
	Invalid = true;
	continue;
	}

	// Check for members of const-qualified, non-class type.
	QualType BaseType = Context.getBaseElementType(Field->getType());
	if (!BaseType->getAs<RecordType>() && BaseType.isConstQualified()) {
	Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign)
	<< Context.getTagDeclType(ClassDecl) << 1 << Field->getDeclName();
	Diag(Field->getLocation(), diag::note_declared_at);
	Invalid = true;
	continue;
	}

	// Suppress assigning zero-width bitfields.
	if (Field->isBitField() && Field->getBitWidthValue(Context) == 0)
	continue;

	QualType FieldType = Field->getType().getNonReferenceType();
	if (FieldType->isIncompleteArrayType()) {
	assert(ClassDecl->hasFlexibleArrayMember() &&
	"Incomplete array type is not valid");
	continue;
	}

	// Build references to the field in the object we're copying from and to.
	LookupResult MemberLookup(*this, Field->getDeclName(), Loc,
	LookupMemberName);
	MemberLookup.addDecl(Field);
	MemberLookup.resolveKind();
	MemberBuilder From(MoveOther, OtherRefType,
	/IsArrow=/false, MemberLookup);
	MemberBuilder To(This, getCurrentThisType(),
	/IsArrow=/true, MemberLookup);

	assert(!From.build(*this, Loc)->isLValue() && // could be xvalue or prvalue
	"Member reference with rvalue base must be rvalue except for reference "
	"members, which aren't allowed for move assignment.");

	// Build the move of this field.
	StmtResult Move = buildSingleCopyAssign(*this, Loc, FieldType,
	To, From,
	/CopyingBaseSubobject=/false,
	/Copying=/false);
	if (Move.isInvalid()) {
	MoveAssignOperator->setInvalidDecl();
	return;
	}

	// Success! Record the copy.
	Statements.push_back(Move.getAs<Stmt>());
	}

	if (!Invalid) {
	// Add a "return *this;"
	ExprResult ThisObj =
	CreateBuiltinUnaryOp(Loc, UO_Deref, This.build(*this, Loc));

	StmtResult Return = BuildReturnStmt(Loc, ThisObj.get());
	if (Return.isInvalid())
	Invalid = true;
	else
	Statements.push_back(Return.getAs<Stmt>());
	}

	if (Invalid) {
	MoveAssignOperator->setInvalidDecl();
	return;
	}

	StmtResult Body;
	{
	CompoundScopeRAII CompoundScope(*this);
	Body = ActOnCompoundStmt(Loc, Loc, Statements,
	/isStmtExpr=/false);
	assert(!Body.isInvalid() && "Compound statement creation cannot fail");
	}
	MoveAssignOperator->setBody(Body.getAs<Stmt>());
	MoveAssignOperator->markUsed(Context);

	if (ASTMutationListener *L = getASTMutationListener()) {
	L->CompletedImplicitDefinition(MoveAssignOperator);
	}
	}

	CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor(
	CXXRecordDecl *ClassDecl) {
	// C++ [class.copy]p4:
	// If the class definition does not explicitly declare a copy
	// constructor, one is declared implicitly.
	assert(ClassDecl->needsImplicitCopyConstructor());

	DeclaringSpecialMember DSM(*this, ClassDecl, CXXCopyConstructor);
	if (DSM.isAlreadyBeingDeclared())
	return nullptr;

	QualType ClassType = Context.getTypeDeclType(ClassDecl);
	QualType ArgType = ClassType;
	bool Const = ClassDecl->implicitCopyConstructorHasConstParam();
	if (Const)
	ArgType = ArgType.withConst();
	ArgType = Context.getLValueReferenceType(ArgType);

	bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl,
	CXXCopyConstructor,
	Const);

	DeclarationName Name
	= Context.DeclarationNames.getCXXConstructorName(
	Context.getCanonicalType(ClassType));
	SourceLocation ClassLoc = ClassDecl->getLocation();
	DeclarationNameInfo NameInfo(Name, ClassLoc);

	// An implicitly-declared copy constructor is an inline public
	// member of its class.
	CXXConstructorDecl *CopyConstructor = CXXConstructorDecl::Create(
	Context, ClassDecl, ClassLoc, NameInfo, QualType(), /TInfo=/nullptr,
	/isExplicit=/false, /isInline=/true, /isImplicitlyDeclared=/true,
	Constexpr);
	CopyConstructor->setAccess(AS_public);
	CopyConstructor->setDefaulted();

	if (getLangOpts().CUDA) {
	inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXCopyConstructor,
	CopyConstructor,
	/* ConstRHS */ Const,
	/* Diagnose */ false);
	}

	// Build an exception specification pointing back at this member.
	FunctionProtoType::ExtProtoInfo EPI =
	getImplicitMethodEPI(*this, CopyConstructor);
	CopyConstructor->setType(
	Context.getFunctionType(Context.VoidTy, ArgType, EPI));

	// Add the parameter to the constructor.
	ParmVarDecl *FromParam = ParmVarDecl::Create(Context, CopyConstructor,
	ClassLoc, ClassLoc,
	/IdentifierInfo=/nullptr,
	ArgType, /TInfo=/nullptr,
	SC_None, nullptr);
	CopyConstructor->setParams(FromParam);

	CopyConstructor->setTrivial(
	ClassDecl->needsOverloadResolutionForCopyConstructor()
	? SpecialMemberIsTrivial(CopyConstructor, CXXCopyConstructor)
	: ClassDecl->hasTrivialCopyConstructor());

	// Note that we have declared this constructor.
	++ASTContext::NumImplicitCopyConstructorsDeclared;

	Scope *S = getScopeForContext(ClassDecl);
	CheckImplicitSpecialMemberDeclaration(S, CopyConstructor);

	- if (ShouldDeleteSpecialMember(CopyConstructor, CXXCopyConstructor))
	+ if (ShouldDeleteSpecialMember(CopyConstructor, CXXCopyConstructor)) {
	+ ClassDecl->setImplicitCopyConstructorIsDeleted();
	SetDeclDeleted(CopyConstructor, ClassLoc);
	+ }

	if (S)
	PushOnScopeChains(CopyConstructor, S, false);
	ClassDecl->addDecl(CopyConstructor);

	return CopyConstructor;
	}

	void Sema::DefineImplicitCopyConstructor(SourceLocation CurrentLocation,
	CXXConstructorDecl *CopyConstructor) {
	assert((CopyConstructor->isDefaulted() &&
	CopyConstructor->isCopyConstructor() &&
	!CopyConstructor->doesThisDeclarationHaveABody() &&
	!CopyConstructor->isDeleted()) &&
	"DefineImplicitCopyConstructor - call it for implicit copy ctor");
	if (CopyConstructor->willHaveBody() \|\| CopyConstructor->isInvalidDecl())
	return;

	CXXRecordDecl *ClassDecl = CopyConstructor->getParent();
	assert(ClassDecl && "DefineImplicitCopyConstructor - invalid constructor");

	SynthesizedFunctionScope Scope(*this, CopyConstructor);

	// The exception specification is needed because we are defining the
	// function.
	ResolveExceptionSpec(CurrentLocation,
	CopyConstructor->getType()->castAs<FunctionProtoType>());
	MarkVTableUsed(CurrentLocation, ClassDecl);

	// Add a context note for diagnostics produced after this point.
	Scope.addContextNote(CurrentLocation);

	// C++11 [class.copy]p7:
	// The [definition of an implicitly declared copy constructor] is
	// deprecated if the class has a user-declared copy assignment operator
	// or a user-declared destructor.
	if (getLangOpts().CPlusPlus11 && CopyConstructor->isImplicit())
	diagnoseDeprecatedCopyOperation(*this, CopyConstructor);

	if (SetCtorInitializers(CopyConstructor, /AnyErrors=/false)) {
	CopyConstructor->setInvalidDecl();
	} else {
	SourceLocation Loc = CopyConstructor->getLocEnd().isValid()
	? CopyConstructor->getLocEnd()
	: CopyConstructor->getLocation();
	Sema::CompoundScopeRAII CompoundScope(*this);
	CopyConstructor->setBody(
	ActOnCompoundStmt(Loc, Loc, None, /isStmtExpr=/false).getAs<Stmt>());
	CopyConstructor->markUsed(Context);
	}

	if (ASTMutationListener *L = getASTMutationListener()) {
	L->CompletedImplicitDefinition(CopyConstructor);
	}
	}

	CXXConstructorDecl *Sema::DeclareImplicitMoveConstructor(
	CXXRecordDecl *ClassDecl) {
	assert(ClassDecl->needsImplicitMoveConstructor());

	DeclaringSpecialMember DSM(*this, ClassDecl, CXXMoveConstructor);
	if (DSM.isAlreadyBeingDeclared())
	return nullptr;

	QualType ClassType = Context.getTypeDeclType(ClassDecl);
	QualType ArgType = Context.getRValueReferenceType(ClassType);

	bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl,
	CXXMoveConstructor,
	false);

	DeclarationName Name
	= Context.DeclarationNames.getCXXConstructorName(
	Context.getCanonicalType(ClassType));
	SourceLocation ClassLoc = ClassDecl->getLocation();
	DeclarationNameInfo NameInfo(Name, ClassLoc);

	// C++11 [class.copy]p11:
	// An implicitly-declared copy/move constructor is an inline public
	// member of its class.
	CXXConstructorDecl *MoveConstructor = CXXConstructorDecl::Create(
	Context, ClassDecl, ClassLoc, NameInfo, QualType(), /TInfo=/nullptr,
	/isExplicit=/false, /isInline=/true, /isImplicitlyDeclared=/true,
	Constexpr);
	MoveConstructor->setAccess(AS_public);
	MoveConstructor->setDefaulted();

	if (getLangOpts().CUDA) {
	inferCUDATargetForImplicitSpecialMember(ClassDecl, CXXMoveConstructor,
	MoveConstructor,
	/* ConstRHS */ false,
	/* Diagnose */ false);
	}

	// Build an exception specification pointing back at this member.
	FunctionProtoType::ExtProtoInfo EPI =
	getImplicitMethodEPI(*this, MoveConstructor);
	MoveConstructor->setType(
	Context.getFunctionType(Context.VoidTy, ArgType, EPI));

	// Add the parameter to the constructor.
	ParmVarDecl *FromParam = ParmVarDecl::Create(Context, MoveConstructor,
	ClassLoc, ClassLoc,
	/IdentifierInfo=/nullptr,
	ArgType, /TInfo=/nullptr,
	SC_None, nullptr);
	MoveConstructor->setParams(FromParam);

	MoveConstructor->setTrivial(
	ClassDecl->needsOverloadResolutionForMoveConstructor()
	? SpecialMemberIsTrivial(MoveConstructor, CXXMoveConstructor)
	: ClassDecl->hasTrivialMoveConstructor());

	// Note that we have declared this constructor.
	++ASTContext::NumImplicitMoveConstructorsDeclared;

	Scope *S = getScopeForContext(ClassDecl);
	CheckImplicitSpecialMemberDeclaration(S, MoveConstructor);

	if (ShouldDeleteSpecialMember(MoveConstructor, CXXMoveConstructor)) {
	ClassDecl->setImplicitMoveConstructorIsDeleted();
	SetDeclDeleted(MoveConstructor, ClassLoc);
	}

	if (S)
	PushOnScopeChains(MoveConstructor, S, false);
	ClassDecl->addDecl(MoveConstructor);

	return MoveConstructor;
	}

	void Sema::DefineImplicitMoveConstructor(SourceLocation CurrentLocation,
	CXXConstructorDecl *MoveConstructor) {
	assert((MoveConstructor->isDefaulted() &&
	MoveConstructor->isMoveConstructor() &&
	!MoveConstructor->doesThisDeclarationHaveABody() &&
	!MoveConstructor->isDeleted()) &&
	"DefineImplicitMoveConstructor - call it for implicit move ctor");
	if (MoveConstructor->willHaveBody() \|\| MoveConstructor->isInvalidDecl())
	return;

	CXXRecordDecl *ClassDecl = MoveConstructor->getParent();
	assert(ClassDecl && "DefineImplicitMoveConstructor - invalid constructor");

	SynthesizedFunctionScope Scope(*this, MoveConstructor);

	// The exception specification is needed because we are defining the
	// function.
	ResolveExceptionSpec(CurrentLocation,
	MoveConstructor->getType()->castAs<FunctionProtoType>());
	MarkVTableUsed(CurrentLocation, ClassDecl);

	// Add a context note for diagnostics produced after this point.
	Scope.addContextNote(CurrentLocation);

	if (SetCtorInitializers(MoveConstructor, /AnyErrors=/false)) {
	MoveConstructor->setInvalidDecl();
	} else {
	SourceLocation Loc = MoveConstructor->getLocEnd().isValid()
	? MoveConstructor->getLocEnd()
	: MoveConstructor->getLocation();
	Sema::CompoundScopeRAII CompoundScope(*this);
	MoveConstructor->setBody(ActOnCompoundStmt(
	Loc, Loc, None, /isStmtExpr=/ false).getAs<Stmt>());
	MoveConstructor->markUsed(Context);
	}

	if (ASTMutationListener *L = getASTMutationListener()) {
	L->CompletedImplicitDefinition(MoveConstructor);
	}
	}

	bool Sema::isImplicitlyDeleted(FunctionDecl *FD) {
	return FD->isDeleted() && FD->isDefaulted() && isa<CXXMethodDecl>(FD);
	}

	void Sema::DefineImplicitLambdaToFunctionPointerConversion(
	SourceLocation CurrentLocation,
	CXXConversionDecl *Conv) {
	SynthesizedFunctionScope Scope(*this, Conv);

	CXXRecordDecl *Lambda = Conv->getParent();
	CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
	// If we are defining a specialization of a conversion to function-ptr
	// cache the deduced template arguments for this specialization
	// so that we can use them to retrieve the corresponding call-operator
	// and static-invoker.
	const TemplateArgumentList *DeducedTemplateArgs = nullptr;

	// Retrieve the corresponding call-operator specialization.
	if (Lambda->isGenericLambda()) {
	assert(Conv->isFunctionTemplateSpecialization());
	FunctionTemplateDecl *CallOpTemplate =
	CallOp->getDescribedFunctionTemplate();
	DeducedTemplateArgs = Conv->getTemplateSpecializationArgs();
	void *InsertPos = nullptr;
	FunctionDecl *CallOpSpec = CallOpTemplate->findSpecialization(
	DeducedTemplateArgs->asArray(),
	InsertPos);
	assert(CallOpSpec &&
	"Conversion operator must have a corresponding call operator");
	CallOp = cast<CXXMethodDecl>(CallOpSpec);
	}

	// Mark the call operator referenced (and add to pending instantiations
	// if necessary).
	// For both the conversion and static-invoker template specializations
	// we construct their body's in this function, so no need to add them
	// to the PendingInstantiations.
	MarkFunctionReferenced(CurrentLocation, CallOp);

	// Retrieve the static invoker...
	CXXMethodDecl *Invoker = Lambda->getLambdaStaticInvoker();
	// ... and get the corresponding specialization for a generic lambda.
	if (Lambda->isGenericLambda()) {
	assert(DeducedTemplateArgs &&
	"Must have deduced template arguments from Conversion Operator");
	FunctionTemplateDecl *InvokeTemplate =
	Invoker->getDescribedFunctionTemplate();
	void *InsertPos = nullptr;
	FunctionDecl *InvokeSpec = InvokeTemplate->findSpecialization(
	DeducedTemplateArgs->asArray(),
	InsertPos);
	assert(InvokeSpec &&
	"Must have a corresponding static invoker specialization");
	Invoker = cast<CXXMethodDecl>(InvokeSpec);
	}
	// Construct the body of the conversion function { return __invoke; }.
	Expr *FunctionRef = BuildDeclRefExpr(Invoker, Invoker->getType(),
	VK_LValue, Conv->getLocation()).get();
	assert(FunctionRef && "Can't refer to __invoke function?");
	Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get();
	Conv->setBody(new (Context) CompoundStmt(Context, Return,
	Conv->getLocation(),
	Conv->getLocation()));

	Conv->markUsed(Context);
	Conv->setReferenced();

	// Fill in the __invoke function with a dummy implementation. IR generation
	// will fill in the actual details.
	Invoker->markUsed(Context);
	Invoker->setReferenced();
	Invoker->setBody(new (Context) CompoundStmt(Conv->getLocation()));

	if (ASTMutationListener *L = getASTMutationListener()) {
	L->CompletedImplicitDefinition(Conv);
	L->CompletedImplicitDefinition(Invoker);
	}
	}



	void Sema::DefineImplicitLambdaToBlockPointerConversion(
	SourceLocation CurrentLocation,
	CXXConversionDecl *Conv)
	{
	assert(!Conv->getParent()->isGenericLambda());

	SynthesizedFunctionScope Scope(*this, Conv);

	// Copy-initialize the lambda object as needed to capture it.
	Expr *This = ActOnCXXThis(CurrentLocation).get();
	Expr *DerefThis =CreateBuiltinUnaryOp(CurrentLocation, UO_Deref, This).get();

	ExprResult BuildBlock = BuildBlockForLambdaConversion(CurrentLocation,
	Conv->getLocation(),
	Conv, DerefThis);

	// If we're not under ARC, make sure we still get the _Block_copy/autorelease
	// behavior. Note that only the general conversion function does this
	// (since it's unusable otherwise); in the case where we inline the
	// block literal, it has block literal lifetime semantics.
	if (!BuildBlock.isInvalid() && !getLangOpts().ObjCAutoRefCount)
	BuildBlock = ImplicitCastExpr::Create(Context, BuildBlock.get()->getType(),
	CK_CopyAndAutoreleaseBlockObject,
	BuildBlock.get(), nullptr, VK_RValue);

	if (BuildBlock.isInvalid()) {
	Diag(CurrentLocation, diag::note_lambda_to_block_conv);
	Conv->setInvalidDecl();
	return;
	}

	// Create the return statement that returns the block from the conversion
	// function.
	StmtResult Return = BuildReturnStmt(Conv->getLocation(), BuildBlock.get());
	if (Return.isInvalid()) {
	Diag(CurrentLocation, diag::note_lambda_to_block_conv);
	Conv->setInvalidDecl();
	return;
	}

	// Set the body of the conversion function.
	Stmt *ReturnS = Return.get();
	Conv->setBody(new (Context) CompoundStmt(Context, ReturnS,
	Conv->getLocation(),
	Conv->getLocation()));
	Conv->markUsed(Context);

	// We're done; notify the mutation listener, if any.
	if (ASTMutationListener *L = getASTMutationListener()) {
	L->CompletedImplicitDefinition(Conv);
	}
	}

	/// \brief Determine whether the given list arguments contains exactly one
	/// "real" (non-default) argument.
	static bool hasOneRealArgument(MultiExprArg Args) {
	switch (Args.size()) {
	case 0:
	return false;

	default:
	if (!Args[1]->isDefaultArgument())
	return false;

	// fall through
	case 1:
	return !Args[0]->isDefaultArgument();
	}

	return false;
	}

	ExprResult
	Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
	NamedDecl *FoundDecl,
	CXXConstructorDecl *Constructor,
	MultiExprArg ExprArgs,
	bool HadMultipleCandidates,
	bool IsListInitialization,
	bool IsStdInitListInitialization,
	bool RequiresZeroInit,
	unsigned ConstructKind,
	SourceRange ParenRange) {
	bool Elidable = false;

	// C++0x [class.copy]p34:
	// When certain criteria are met, an implementation is allowed to
	// omit the copy/move construction of a class object, even if the
	// copy/move constructor and/or destructor for the object have
	// side effects. [...]
	// - when a temporary class object that has not been bound to a
	// reference (12.2) would be copied/moved to a class object
	// with the same cv-unqualified type, the copy/move operation
	// can be omitted by constructing the temporary object
	// directly into the target of the omitted copy/move
	if (ConstructKind == CXXConstructExpr::CK_Complete && Constructor &&
	Constructor->isCopyOrMoveConstructor() && hasOneRealArgument(ExprArgs)) {
	Expr *SubExpr = ExprArgs[0];
	Elidable = SubExpr->isTemporaryObject(
	Context, cast<CXXRecordDecl>(FoundDecl->getDeclContext()));
	}

	return BuildCXXConstructExpr(ConstructLoc, DeclInitType,
	FoundDecl, Constructor,
	Elidable, ExprArgs, HadMultipleCandidates,
	IsListInitialization,
	IsStdInitListInitialization, RequiresZeroInit,
	ConstructKind, ParenRange);
	}

	ExprResult
	Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
	NamedDecl *FoundDecl,
	CXXConstructorDecl *Constructor,
	bool Elidable,
	MultiExprArg ExprArgs,
	bool HadMultipleCandidates,
	bool IsListInitialization,
	bool IsStdInitListInitialization,
	bool RequiresZeroInit,
	unsigned ConstructKind,
	SourceRange ParenRange) {
	if (auto *Shadow = dyn_cast<ConstructorUsingShadowDecl>(FoundDecl)) {
	Constructor = findInheritingConstructor(ConstructLoc, Constructor, Shadow);
	if (DiagnoseUseOfDecl(Constructor, ConstructLoc))
	return ExprError();
	}

	return BuildCXXConstructExpr(
	ConstructLoc, DeclInitType, Constructor, Elidable, ExprArgs,
	HadMultipleCandidates, IsListInitialization, IsStdInitListInitialization,
	RequiresZeroInit, ConstructKind, ParenRange);
	}

	/// BuildCXXConstructExpr - Creates a complete call to a constructor,
	/// including handling of its default argument expressions.
	ExprResult
	Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
	CXXConstructorDecl *Constructor,
	bool Elidable,
	MultiExprArg ExprArgs,
	bool HadMultipleCandidates,
	bool IsListInitialization,
	bool IsStdInitListInitialization,
	bool RequiresZeroInit,
	unsigned ConstructKind,
	SourceRange ParenRange) {
	assert(declaresSameEntity(
	Constructor->getParent(),
	DeclInitType->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) &&
	"given constructor for wrong type");
	MarkFunctionReferenced(ConstructLoc, Constructor);
	if (getLangOpts().CUDA && !CheckCUDACall(ConstructLoc, Constructor))
	return ExprError();

	return CXXConstructExpr::Create(
	Context, DeclInitType, ConstructLoc, Constructor, Elidable,
	ExprArgs, HadMultipleCandidates, IsListInitialization,
	IsStdInitListInitialization, RequiresZeroInit,
	static_cast<CXXConstructExpr::ConstructionKind>(ConstructKind),
	ParenRange);
	}

	ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
	assert(Field->hasInClassInitializer());

	// If we already have the in-class initializer nothing needs to be done.
	if (Field->getInClassInitializer())
	return CXXDefaultInitExpr::Create(Context, Loc, Field);

	// If we might have already tried and failed to instantiate, don't try again.
	if (Field->isInvalidDecl())
	return ExprError();

	// Maybe we haven't instantiated the in-class initializer. Go check the
	// pattern FieldDecl to see if it has one.
	CXXRecordDecl *ParentRD = cast<CXXRecordDecl>(Field->getParent());

	if (isTemplateInstantiation(ParentRD->getTemplateSpecializationKind())) {
	CXXRecordDecl *ClassPattern = ParentRD->getTemplateInstantiationPattern();
	DeclContext::lookup_result Lookup =
	ClassPattern->lookup(Field->getDeclName());

	// Lookup can return at most two results: the pattern for the field, or the
	// injected class name of the parent record. No other member can have the
	// same name as the field.
	// In modules mode, lookup can return multiple results (coming from
	// different modules).
	assert((getLangOpts().Modules \|\| (!Lookup.empty() && Lookup.size() <= 2)) &&
	"more than two lookup results for field name");
	FieldDecl *Pattern = dyn_cast<FieldDecl>(Lookup[0]);
	if (!Pattern) {
	assert(isa<CXXRecordDecl>(Lookup[0]) &&
	"cannot have other non-field member with same name");
	for (auto L : Lookup)
	if (isa<FieldDecl>(L)) {
	Pattern = cast<FieldDecl>(L);
	break;
	}
	assert(Pattern && "We must have set the Pattern!");
	}

	if (InstantiateInClassInitializer(Loc, Field, Pattern,
	getTemplateInstantiationArgs(Field))) {
	// Don't diagnose this again.
	Field->setInvalidDecl();
	return ExprError();
	}
	return CXXDefaultInitExpr::Create(Context, Loc, Field);
	}

	// DR1351:
	// If the brace-or-equal-initializer of a non-static data member
	// invokes a defaulted default constructor of its class or of an
	// enclosing class in a potentially evaluated subexpression, the
	// program is ill-formed.
	//
	// This resolution is unworkable: the exception specification of the
	// default constructor can be needed in an unevaluated context, in
	// particular, in the operand of a noexcept-expression, and we can be
	// unable to compute an exception specification for an enclosed class.
	//
	// Any attempt to resolve the exception specification of a defaulted default
	// constructor before the initializer is lexically complete will ultimately
	// come here at which point we can diagnose it.
	RecordDecl *OutermostClass = ParentRD->getOuterLexicalRecordContext();
	Diag(Loc, diag::err_in_class_initializer_not_yet_parsed)
	<< OutermostClass << Field;
	Diag(Field->getLocEnd(), diag::note_in_class_initializer_not_yet_parsed);
	// Recover by marking the field invalid, unless we're in a SFINAE context.
	if (!isSFINAEContext())
	Field->setInvalidDecl();
	return ExprError();
	}

	void Sema::FinalizeVarWithDestructor(VarDecl VD, const RecordType Record) {
	if (VD->isInvalidDecl()) return;

	CXXRecordDecl *ClassDecl = cast<CXXRecordDecl>(Record->getDecl());
	if (ClassDecl->isInvalidDecl()) return;
	if (ClassDecl->hasIrrelevantDestructor()) return;
	if (ClassDecl->isDependentContext()) return;

	CXXDestructorDecl *Destructor = LookupDestructor(ClassDecl);
	MarkFunctionReferenced(VD->getLocation(), Destructor);
	CheckDestructorAccess(VD->getLocation(), Destructor,
	PDiag(diag::err_access_dtor_var)
	<< VD->getDeclName()
	<< VD->getType());
	DiagnoseUseOfDecl(Destructor, VD->getLocation());

	if (Destructor->isTrivial()) return;
	if (!VD->hasGlobalStorage()) return;

	// Emit warning for non-trivial dtor in global scope (a real global,
	// class-static, function-static).
	Diag(VD->getLocation(), diag::warn_exit_time_destructor);

	// TODO: this should be re-enabled for static locals by !CXAAtExit
	if (!VD->isStaticLocal())
	Diag(VD->getLocation(), diag::warn_global_destructor);
	}

	/// \brief Given a constructor and the set of arguments provided for the
	/// constructor, convert the arguments and add any required default arguments
	/// to form a proper call to this constructor.
	///
	/// \returns true if an error occurred, false otherwise.
	bool
	Sema::CompleteConstructorCall(CXXConstructorDecl *Constructor,
	MultiExprArg ArgsPtr,
	SourceLocation Loc,
	SmallVectorImpl<Expr*> &ConvertedArgs,
	bool AllowExplicit,
	bool IsListInitialization) {
	// FIXME: This duplicates a lot of code from Sema::ConvertArgumentsForCall.
	unsigned NumArgs = ArgsPtr.size();
	Expr **Args = ArgsPtr.data();

	const FunctionProtoType *Proto
	= Constructor->getType()->getAs<FunctionProtoType>();
	assert(Proto && "Constructor without a prototype?");
	unsigned NumParams = Proto->getNumParams();

	// If too few arguments are available, we'll fill in the rest with defaults.
	if (NumArgs < NumParams)
	ConvertedArgs.reserve(NumParams);
	else
	ConvertedArgs.reserve(NumArgs);

	VariadicCallType CallType =
	Proto->isVariadic() ? VariadicConstructor : VariadicDoesNotApply;
	SmallVector<Expr *, 8> AllArgs;
	bool Invalid = GatherArgumentsForCall(Loc, Constructor,
	Proto, 0,
	llvm::makeArrayRef(Args, NumArgs),
	AllArgs,
	CallType, AllowExplicit,
	IsListInitialization);
	ConvertedArgs.append(AllArgs.begin(), AllArgs.end());

	DiagnoseSentinelCalls(Constructor, Loc, AllArgs);

	CheckConstructorCall(Constructor,
	llvm::makeArrayRef(AllArgs.data(), AllArgs.size()),
	Proto, Loc);

	return Invalid;
	}

	static inline bool
	CheckOperatorNewDeleteDeclarationScope(Sema &SemaRef,
	const FunctionDecl *FnDecl) {
	const DeclContext *DC = FnDecl->getDeclContext()->getRedeclContext();
	if (isa<NamespaceDecl>(DC)) {
	return SemaRef.Diag(FnDecl->getLocation(),
	diag::err_operator_new_delete_declared_in_namespace)
	<< FnDecl->getDeclName();
	}

	if (isa<TranslationUnitDecl>(DC) &&
	FnDecl->getStorageClass() == SC_Static) {
	return SemaRef.Diag(FnDecl->getLocation(),
	diag::err_operator_new_delete_declared_static)
	<< FnDecl->getDeclName();
	}

	return false;
	}

	static inline bool
	CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl,
	CanQualType ExpectedResultType,
	CanQualType ExpectedFirstParamType,
	unsigned DependentParamTypeDiag,
	unsigned InvalidParamTypeDiag) {
	QualType ResultType =
	FnDecl->getType()->getAs<FunctionType>()->getReturnType();

	// Check that the result type is not dependent.
	if (ResultType->isDependentType())
	return SemaRef.Diag(FnDecl->getLocation(),
	diag::err_operator_new_delete_dependent_result_type)
	<< FnDecl->getDeclName() << ExpectedResultType;

	// Check that the result type is what we expect.
	if (SemaRef.Context.getCanonicalType(ResultType) != ExpectedResultType)
	return SemaRef.Diag(FnDecl->getLocation(),
	diag::err_operator_new_delete_invalid_result_type)
	<< FnDecl->getDeclName() << ExpectedResultType;

	// A function template must have at least 2 parameters.
	if (FnDecl->getDescribedFunctionTemplate() && FnDecl->getNumParams() < 2)
	return SemaRef.Diag(FnDecl->getLocation(),
	diag::err_operator_new_delete_template_too_few_parameters)
	<< FnDecl->getDeclName();

	// The function decl must have at least 1 parameter.
	if (FnDecl->getNumParams() == 0)
	return SemaRef.Diag(FnDecl->getLocation(),
	diag::err_operator_new_delete_too_few_parameters)
	<< FnDecl->getDeclName();

	// Check the first parameter type is not dependent.
	QualType FirstParamType = FnDecl->getParamDecl(0)->getType();
	if (FirstParamType->isDependentType())
	return SemaRef.Diag(FnDecl->getLocation(), DependentParamTypeDiag)
	<< FnDecl->getDeclName() << ExpectedFirstParamType;

	// Check that the first parameter type is what we expect.
	if (SemaRef.Context.getCanonicalType(FirstParamType).getUnqualifiedType() !=
	ExpectedFirstParamType)
	return SemaRef.Diag(FnDecl->getLocation(), InvalidParamTypeDiag)
	<< FnDecl->getDeclName() << ExpectedFirstParamType;

	return false;
	}

	static bool
	CheckOperatorNewDeclaration(Sema &SemaRef, const FunctionDecl *FnDecl) {
	// C++ [basic.stc.dynamic.allocation]p1:
	// A program is ill-formed if an allocation function is declared in a
	// namespace scope other than global scope or declared static in global
	// scope.
	if (CheckOperatorNewDeleteDeclarationScope(SemaRef, FnDecl))
	return true;

	CanQualType SizeTy =
	SemaRef.Context.getCanonicalType(SemaRef.Context.getSizeType());

	// C++ [basic.stc.dynamic.allocation]p1:
	// The return type shall be void*. The first parameter shall have type
	// std::size_t.
	if (CheckOperatorNewDeleteTypes(SemaRef, FnDecl, SemaRef.Context.VoidPtrTy,
	SizeTy,
	diag::err_operator_new_dependent_param_type,
	diag::err_operator_new_param_type))
	return true;

	// C++ [basic.stc.dynamic.allocation]p1:
	// The first parameter shall not have an associated default argument.
	if (FnDecl->getParamDecl(0)->hasDefaultArg())
	return SemaRef.Diag(FnDecl->getLocation(),
	diag::err_operator_new_default_arg)
	<< FnDecl->getDeclName() << FnDecl->getParamDecl(0)->getDefaultArgRange();

	return false;
	}

	static bool
	CheckOperatorDeleteDeclaration(Sema &SemaRef, FunctionDecl *FnDecl) {
	// C++ [basic.stc.dynamic.deallocation]p1:
	// A program is ill-formed if deallocation functions are declared in a
	// namespace scope other than global scope or declared static in global
	// scope.
	if (CheckOperatorNewDeleteDeclarationScope(SemaRef, FnDecl))
	return true;

	// C++ [basic.stc.dynamic.deallocation]p2:
	// Each deallocation function shall return void and its first parameter
	// shall be void*.
	if (CheckOperatorNewDeleteTypes(SemaRef, FnDecl, SemaRef.Context.VoidTy,
	SemaRef.Context.VoidPtrTy,
	diag::err_operator_delete_dependent_param_type,
	diag::err_operator_delete_param_type))
	return true;

	return false;
	}

	/// CheckOverloadedOperatorDeclaration - Check whether the declaration
	/// of this overloaded operator is well-formed. If so, returns false;
	/// otherwise, emits appropriate diagnostics and returns true.
	bool Sema::CheckOverloadedOperatorDeclaration(FunctionDecl *FnDecl) {
	assert(FnDecl && FnDecl->isOverloadedOperator() &&
	"Expected an overloaded operator declaration");

	OverloadedOperatorKind Op = FnDecl->getOverloadedOperator();

	// C++ [over.oper]p5:
	// The allocation and deallocation functions, operator new,
	// operator new[], operator delete and operator delete[], are
	// described completely in 3.7.3. The attributes and restrictions
	// found in the rest of this subclause do not apply to them unless
	// explicitly stated in 3.7.3.
	if (Op == OO_Delete \|\| Op == OO_Array_Delete)
	return CheckOperatorDeleteDeclaration(*this, FnDecl);

	if (Op == OO_New \|\| Op == OO_Array_New)
	return CheckOperatorNewDeclaration(*this, FnDecl);

	// C++ [over.oper]p6:
	// An operator function shall either be a non-static member
	// function or be a non-member function and have at least one
	// parameter whose type is a class, a reference to a class, an
	// enumeration, or a reference to an enumeration.
	if (CXXMethodDecl *MethodDecl = dyn_cast<CXXMethodDecl>(FnDecl)) {
	if (MethodDecl->isStatic())
	return Diag(FnDecl->getLocation(),
	diag::err_operator_overload_static) << FnDecl->getDeclName();
	} else {
	bool ClassOrEnumParam = false;
	for (auto Param : FnDecl->parameters()) {
	QualType ParamType = Param->getType().getNonReferenceType();
	if (ParamType->isDependentType() \|\| ParamType->isRecordType() \|\|
	ParamType->isEnumeralType()) {
	ClassOrEnumParam = true;
	break;
	}
	}

	if (!ClassOrEnumParam)
	return Diag(FnDecl->getLocation(),
	diag::err_operator_overload_needs_class_or_enum)
	<< FnDecl->getDeclName();
	}

	// C++ [over.oper]p8:
	// An operator function cannot have default arguments (8.3.6),
	// except where explicitly stated below.
	//
	// Only the function-call operator allows default arguments
	// (C++ [over.call]p1).
	if (Op != OO_Call) {
	for (auto Param : FnDecl->parameters()) {
	if (Param->hasDefaultArg())
	return Diag(Param->getLocation(),
	diag::err_operator_overload_default_arg)
	<< FnDecl->getDeclName() << Param->getDefaultArgRange();
	}
	}

	static const bool OperatorUses[NUM_OVERLOADED_OPERATORS][3] = {
	{ false, false, false }
	#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
	, { Unary, Binary, MemberOnly }
	#include "clang/Basic/OperatorKinds.def"
	};

	bool CanBeUnaryOperator = OperatorUses[Op][0];
	bool CanBeBinaryOperator = OperatorUses[Op][1];
	bool MustBeMemberOperator = OperatorUses[Op][2];

	// C++ [over.oper]p8:
	// [...] Operator functions cannot have more or fewer parameters
	// than the number required for the corresponding operator, as
	// described in the rest of this subclause.
	unsigned NumParams = FnDecl->getNumParams()
	+ (isa<CXXMethodDecl>(FnDecl)? 1 : 0);
	if (Op != OO_Call &&
	((NumParams == 1 && !CanBeUnaryOperator) \|\|
	(NumParams == 2 && !CanBeBinaryOperator) \|\|
	(NumParams < 1) \|\| (NumParams > 2))) {
	// We have the wrong number of parameters.
	unsigned ErrorKind;
	if (CanBeUnaryOperator && CanBeBinaryOperator) {
	ErrorKind = 2; // 2 -> unary or binary.
	} else if (CanBeUnaryOperator) {
	ErrorKind = 0; // 0 -> unary
	} else {
	assert(CanBeBinaryOperator &&
	"All non-call overloaded operators are unary or binary!");
	ErrorKind = 1; // 1 -> binary
	}

	return Diag(FnDecl->getLocation(), diag::err_operator_overload_must_be)
	<< FnDecl->getDeclName() << NumParams << ErrorKind;
	}

	// Overloaded operators other than operator() cannot be variadic.
	if (Op != OO_Call &&
	FnDecl->getType()->getAs<FunctionProtoType>()->isVariadic()) {
	return Diag(FnDecl->getLocation(), diag::err_operator_overload_variadic)
	<< FnDecl->getDeclName();
	}

	// Some operators must be non-static member functions.
	if (MustBeMemberOperator && !isa<CXXMethodDecl>(FnDecl)) {
	return Diag(FnDecl->getLocation(),
	diag::err_operator_overload_must_be_member)
	<< FnDecl->getDeclName();
	}

	// C++ [over.inc]p1:
	// The user-defined function called operator++ implements the
	// prefix and postfix ++ operator. If this function is a member
	// function with no parameters, or a non-member function with one
	// parameter of class or enumeration type, it defines the prefix
	// increment operator ++ for objects of that type. If the function
	// is a member function with one parameter (which shall be of type
	// int) or a non-member function with two parameters (the second
	// of which shall be of type int), it defines the postfix
	// increment operator ++ for objects of that type.
	if ((Op == OO_PlusPlus \|\| Op == OO_MinusMinus) && NumParams == 2) {
	ParmVarDecl *LastParam = FnDecl->getParamDecl(FnDecl->getNumParams() - 1);
	QualType ParamType = LastParam->getType();

	if (!ParamType->isSpecificBuiltinType(BuiltinType::Int) &&
	!ParamType->isDependentType())
	return Diag(LastParam->getLocation(),
	diag::err_operator_overload_post_incdec_must_be_int)
	<< LastParam->getType() << (Op == OO_MinusMinus);
	}

	return false;
	}

	static bool
	checkLiteralOperatorTemplateParameterList(Sema &SemaRef,
	FunctionTemplateDecl *TpDecl) {
	TemplateParameterList *TemplateParams = TpDecl->getTemplateParameters();

	// Must have one or two template parameters.
	if (TemplateParams->size() == 1) {
	NonTypeTemplateParmDecl *PmDecl =
	dyn_cast<NonTypeTemplateParmDecl>(TemplateParams->getParam(0));

	// The template parameter must be a char parameter pack.
	if (PmDecl && PmDecl->isTemplateParameterPack() &&
	SemaRef.Context.hasSameType(PmDecl->getType(), SemaRef.Context.CharTy))
	return false;

	} else if (TemplateParams->size() == 2) {
	TemplateTypeParmDecl *PmType =
	dyn_cast<TemplateTypeParmDecl>(TemplateParams->getParam(0));
	NonTypeTemplateParmDecl *PmArgs =
	dyn_cast<NonTypeTemplateParmDecl>(TemplateParams->getParam(1));

	// The second template parameter must be a parameter pack with the
	// first template parameter as its type.
	if (PmType && PmArgs && !PmType->isTemplateParameterPack() &&
	PmArgs->isTemplateParameterPack()) {
	const TemplateTypeParmType *TArgs =
	PmArgs->getType()->getAs<TemplateTypeParmType>();
	if (TArgs && TArgs->getDepth() == PmType->getDepth() &&
	TArgs->getIndex() == PmType->getIndex()) {
	if (!SemaRef.inTemplateInstantiation())
	SemaRef.Diag(TpDecl->getLocation(),
	diag::ext_string_literal_operator_template);
	return false;
	}
	}
	}

	SemaRef.Diag(TpDecl->getTemplateParameters()->getSourceRange().getBegin(),
	diag::err_literal_operator_template)
	<< TpDecl->getTemplateParameters()->getSourceRange();
	return true;
	}

	/// CheckLiteralOperatorDeclaration - Check whether the declaration
	/// of this literal operator function is well-formed. If so, returns
	/// false; otherwise, emits appropriate diagnostics and returns true.
	bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) {
	if (isa<CXXMethodDecl>(FnDecl)) {
	Diag(FnDecl->getLocation(), diag::err_literal_operator_outside_namespace)
	<< FnDecl->getDeclName();
	return true;
	}

	if (FnDecl->isExternC()) {
	Diag(FnDecl->getLocation(), diag::err_literal_operator_extern_c);
	if (const LinkageSpecDecl *LSD =
	FnDecl->getDeclContext()->getExternCContext())
	Diag(LSD->getExternLoc(), diag::note_extern_c_begins_here);
	return true;
	}

	// This might be the definition of a literal operator template.
	FunctionTemplateDecl *TpDecl = FnDecl->getDescribedFunctionTemplate();

	// This might be a specialization of a literal operator template.
	if (!TpDecl)
	TpDecl = FnDecl->getPrimaryTemplate();

	// template <char...> type operator "" name() and
	// template <class T, T...> type operator "" name() are the only valid
	// template signatures, and the only valid signatures with no parameters.
	if (TpDecl) {
	if (FnDecl->param_size() != 0) {
	Diag(FnDecl->getLocation(),
	diag::err_literal_operator_template_with_params);
	return true;
	}

	if (checkLiteralOperatorTemplateParameterList(*this, TpDecl))
	return true;

	} else if (FnDecl->param_size() == 1) {
	const ParmVarDecl *Param = FnDecl->getParamDecl(0);

	QualType ParamType = Param->getType().getUnqualifiedType();

	// Only unsigned long long int, long double, any character type, and const
	// char * are allowed as the only parameters.
	if (ParamType->isSpecificBuiltinType(BuiltinType::ULongLong) \|\|
	ParamType->isSpecificBuiltinType(BuiltinType::LongDouble) \|\|
	Context.hasSameType(ParamType, Context.CharTy) \|\|
	Context.hasSameType(ParamType, Context.WideCharTy) \|\|
	Context.hasSameType(ParamType, Context.Char16Ty) \|\|
	Context.hasSameType(ParamType, Context.Char32Ty)) {
	} else if (const PointerType *Ptr = ParamType->getAs<PointerType>()) {
	QualType InnerType = Ptr->getPointeeType();

	// Pointer parameter must be a const char *.
	if (!(Context.hasSameType(InnerType.getUnqualifiedType(),
	Context.CharTy) &&
	InnerType.isConstQualified() && !InnerType.isVolatileQualified())) {
	Diag(Param->getSourceRange().getBegin(),
	diag::err_literal_operator_param)
	<< ParamType << "'const char *'" << Param->getSourceRange();
	return true;
	}

	} else if (ParamType->isRealFloatingType()) {
	Diag(Param->getSourceRange().getBegin(), diag::err_literal_operator_param)
	<< ParamType << Context.LongDoubleTy << Param->getSourceRange();
	return true;

	} else if (ParamType->isIntegerType()) {
	Diag(Param->getSourceRange().getBegin(), diag::err_literal_operator_param)
	<< ParamType << Context.UnsignedLongLongTy << Param->getSourceRange();
	return true;

	} else {
	Diag(Param->getSourceRange().getBegin(),
	diag::err_literal_operator_invalid_param)
	<< ParamType << Param->getSourceRange();
	return true;
	}

	} else if (FnDecl->param_size() == 2) {
	FunctionDecl::param_iterator Param = FnDecl->param_begin();

	// First, verify that the first parameter is correct.

	QualType FirstParamType = (*Param)->getType().getUnqualifiedType();

	// Two parameter function must have a pointer to const as a
	// first parameter; let's strip those qualifiers.
	const PointerType *PT = FirstParamType->getAs<PointerType>();

	if (!PT) {
	Diag((*Param)->getSourceRange().getBegin(),
	diag::err_literal_operator_param)
	<< FirstParamType << "'const char '" << (Param)->getSourceRange();
	return true;
	}

	QualType PointeeType = PT->getPointeeType();
	// First parameter must be const
	if (!PointeeType.isConstQualified() \|\| PointeeType.isVolatileQualified()) {
	Diag((*Param)->getSourceRange().getBegin(),
	diag::err_literal_operator_param)
	<< FirstParamType << "'const char '" << (Param)->getSourceRange();
	return true;
	}

	QualType InnerType = PointeeType.getUnqualifiedType();
	// Only const char , const wchar_t, const char16_t, and const char32_t
	// are allowed as the first parameter to a two-parameter function
	if (!(Context.hasSameType(InnerType, Context.CharTy) \|\|
	Context.hasSameType(InnerType, Context.WideCharTy) \|\|
	Context.hasSameType(InnerType, Context.Char16Ty) \|\|
	Context.hasSameType(InnerType, Context.Char32Ty))) {
	Diag((*Param)->getSourceRange().getBegin(),
	diag::err_literal_operator_param)
	<< FirstParamType << "'const char '" << (Param)->getSourceRange();
	return true;
	}

	// Move on to the second and final parameter.
	++Param;

	// The second parameter must be a std::size_t.
	QualType SecondParamType = (*Param)->getType().getUnqualifiedType();
	if (!Context.hasSameType(SecondParamType, Context.getSizeType())) {
	Diag((*Param)->getSourceRange().getBegin(),
	diag::err_literal_operator_param)
	<< SecondParamType << Context.getSizeType()
	<< (*Param)->getSourceRange();
	return true;
	}
	} else {
	Diag(FnDecl->getLocation(), diag::err_literal_operator_bad_param_count);
	return true;
	}

	// Parameters are good.

	// A parameter-declaration-clause containing a default argument is not
	// equivalent to any of the permitted forms.
	for (auto Param : FnDecl->parameters()) {
	if (Param->hasDefaultArg()) {
	Diag(Param->getDefaultArgRange().getBegin(),
	diag::err_literal_operator_default_argument)
	<< Param->getDefaultArgRange();
	break;
	}
	}

	StringRef LiteralName
	= FnDecl->getDeclName().getCXXLiteralIdentifier()->getName();
	if (LiteralName[0] != '_') {
	// C++11 [usrlit.suffix]p1:
	// Literal suffix identifiers that do not start with an underscore
	// are reserved for future standardization.
	Diag(FnDecl->getLocation(), diag::warn_user_literal_reserved)
	<< StringLiteralParser::isValidUDSuffix(getLangOpts(), LiteralName);
	}

	return false;
	}

	/// ActOnStartLinkageSpecification - Parsed the beginning of a C++
	/// linkage specification, including the language and (if present)
	/// the '{'. ExternLoc is the location of the 'extern', Lang is the
	/// language string literal. LBraceLoc, if valid, provides the location of
	/// the '{' brace. Otherwise, this linkage specification does not
	/// have any braces.
	Decl Sema::ActOnStartLinkageSpecification(Scope S, SourceLocation ExternLoc,
	Expr *LangStr,
	SourceLocation LBraceLoc) {
	StringLiteral *Lit = cast<StringLiteral>(LangStr);
	if (!Lit->isAscii()) {
	Diag(LangStr->getExprLoc(), diag::err_language_linkage_spec_not_ascii)
	<< LangStr->getSourceRange();
	return nullptr;
	}

	StringRef Lang = Lit->getString();
	LinkageSpecDecl::LanguageIDs Language;
	if (Lang == "C")
	Language = LinkageSpecDecl::lang_c;
	else if (Lang == "C++")
	Language = LinkageSpecDecl::lang_cxx;
	else {
	Diag(LangStr->getExprLoc(), diag::err_language_linkage_spec_unknown)
	<< LangStr->getSourceRange();
	return nullptr;
	}

	// FIXME: Add all the various semantics of linkage specifications

	LinkageSpecDecl *D = LinkageSpecDecl::Create(Context, CurContext, ExternLoc,
	LangStr->getExprLoc(), Language,
	LBraceLoc.isValid());
	CurContext->addDecl(D);
	PushDeclContext(S, D);
	return D;
	}

	/// ActOnFinishLinkageSpecification - Complete the definition of
	/// the C++ linkage specification LinkageSpec. If RBraceLoc is
	/// valid, it's the position of the closing '}' brace in a linkage
	/// specification that uses braces.
	Decl Sema::ActOnFinishLinkageSpecification(Scope S,
	Decl *LinkageSpec,
	SourceLocation RBraceLoc) {
	if (RBraceLoc.isValid()) {
	LinkageSpecDecl* LSDecl = cast<LinkageSpecDecl>(LinkageSpec);
	LSDecl->setRBraceLoc(RBraceLoc);
	}
	PopDeclContext();
	return LinkageSpec;
	}

	Decl Sema::ActOnEmptyDeclaration(Scope S,
	AttributeList *AttrList,
	SourceLocation SemiLoc) {
	Decl *ED = EmptyDecl::Create(Context, CurContext, SemiLoc);
	// Attribute declarations appertain to empty declaration so we handle
	// them here.
	if (AttrList)
	ProcessDeclAttributeList(S, ED, AttrList);

	CurContext->addDecl(ED);
	return ED;
	}

	/// \brief Perform semantic analysis for the variable declaration that
	/// occurs within a C++ catch clause, returning the newly-created
	/// variable.
	VarDecl Sema::BuildExceptionDeclaration(Scope S,
	TypeSourceInfo *TInfo,
	SourceLocation StartLoc,
	SourceLocation Loc,
	IdentifierInfo *Name) {
	bool Invalid = false;
	QualType ExDeclType = TInfo->getType();

	// Arrays and functions decay.
	if (ExDeclType->isArrayType())
	ExDeclType = Context.getArrayDecayedType(ExDeclType);
	else if (ExDeclType->isFunctionType())
	ExDeclType = Context.getPointerType(ExDeclType);

	// C++ 15.3p1: The exception-declaration shall not denote an incomplete type.
	// The exception-declaration shall not denote a pointer or reference to an
	// incomplete type, other than [cv] void*.
	// N2844 forbids rvalue references.
	if (!ExDeclType->isDependentType() && ExDeclType->isRValueReferenceType()) {
	Diag(Loc, diag::err_catch_rvalue_ref);
	Invalid = true;
	}

	if (ExDeclType->isVariablyModifiedType()) {
	Diag(Loc, diag::err_catch_variably_modified) << ExDeclType;
	Invalid = true;
	}

	QualType BaseType = ExDeclType;
	int Mode = 0; // 0 for direct type, 1 for pointer, 2 for reference
	unsigned DK = diag::err_catch_incomplete;
	if (const PointerType *Ptr = BaseType->getAs<PointerType>()) {
	BaseType = Ptr->getPointeeType();
	Mode = 1;
	DK = diag::err_catch_incomplete_ptr;
	} else if (const ReferenceType *Ref = BaseType->getAs<ReferenceType>()) {
	// For the purpose of error recovery, we treat rvalue refs like lvalue refs.
	BaseType = Ref->getPointeeType();
	Mode = 2;
	DK = diag::err_catch_incomplete_ref;
	}
	if (!Invalid && (Mode == 0 \|\| !BaseType->isVoidType()) &&
	!BaseType->isDependentType() && RequireCompleteType(Loc, BaseType, DK))
	Invalid = true;

	if (!Invalid && !ExDeclType->isDependentType() &&
	RequireNonAbstractType(Loc, ExDeclType,
	diag::err_abstract_type_in_decl,
	AbstractVariableType))
	Invalid = true;

	// Only the non-fragile NeXT runtime currently supports C++ catches
	// of ObjC types, and no runtime supports catching ObjC types by value.
	if (!Invalid && getLangOpts().ObjC1) {
	QualType T = ExDeclType;
	if (const ReferenceType *RT = T->getAs<ReferenceType>())
	T = RT->getPointeeType();

	if (T->isObjCObjectType()) {
	Diag(Loc, diag::err_objc_object_catch);
	Invalid = true;
	} else if (T->isObjCObjectPointerType()) {
	// FIXME: should this be a test for macosx-fragile specifically?
	if (getLangOpts().ObjCRuntime.isFragile())
	Diag(Loc, diag::warn_objc_pointer_cxx_catch_fragile);
	}
	}

	VarDecl *ExDecl = VarDecl::Create(Context, CurContext, StartLoc, Loc, Name,
	ExDeclType, TInfo, SC_None);
	ExDecl->setExceptionVariable(true);

	// In ARC, infer 'retaining' for variables of retainable type.
	if (getLangOpts().ObjCAutoRefCount && inferObjCARCLifetime(ExDecl))
	Invalid = true;

	if (!Invalid && !ExDeclType->isDependentType()) {
	if (const RecordType *recordType = ExDeclType->getAs<RecordType>()) {
	// Insulate this from anything else we might currently be parsing.
	EnterExpressionEvaluationContext scope(
	*this, ExpressionEvaluationContext::PotentiallyEvaluated);

	// C++ [except.handle]p16:
	// The object declared in an exception-declaration or, if the
	// exception-declaration does not specify a name, a temporary (12.2) is
	// copy-initialized (8.5) from the exception object. [...]
	// The object is destroyed when the handler exits, after the destruction
	// of any automatic objects initialized within the handler.
	//
	// We just pretend to initialize the object with itself, then make sure
	// it can be destroyed later.
	QualType initType = Context.getExceptionObjectType(ExDeclType);

	InitializedEntity entity =
	InitializedEntity::InitializeVariable(ExDecl);
	InitializationKind initKind =
	InitializationKind::CreateCopy(Loc, SourceLocation());

	Expr *opaqueValue =
	new (Context) OpaqueValueExpr(Loc, initType, VK_LValue, OK_Ordinary);
	InitializationSequence sequence(*this, entity, initKind, opaqueValue);
	ExprResult result = sequence.Perform(*this, entity, initKind, opaqueValue);
	if (result.isInvalid())
	Invalid = true;
	else {
	// If the constructor used was non-trivial, set this as the
	// "initializer".
	CXXConstructExpr *construct = result.getAs<CXXConstructExpr>();
	if (!construct->getConstructor()->isTrivial()) {
	Expr *init = MaybeCreateExprWithCleanups(construct);
	ExDecl->setInit(init);
	}

	// And make sure it's destructable.
	FinalizeVarWithDestructor(ExDecl, recordType);
	}
	}
	}

	if (Invalid)
	ExDecl->setInvalidDecl();

	return ExDecl;
	}

	/// ActOnExceptionDeclarator - Parsed the exception-declarator in a C++ catch
	/// handler.
	Decl Sema::ActOnExceptionDeclarator(Scope S, Declarator &D) {
	TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
	bool Invalid = D.isInvalidType();

	// Check for unexpanded parameter packs.
	if (DiagnoseUnexpandedParameterPack(D.getIdentifierLoc(), TInfo,
	UPPC_ExceptionType)) {
	TInfo = Context.getTrivialTypeSourceInfo(Context.IntTy,
	D.getIdentifierLoc());
	Invalid = true;
	}

	IdentifierInfo *II = D.getIdentifier();
	if (NamedDecl *PrevDecl = LookupSingleName(S, II, D.getIdentifierLoc(),
	LookupOrdinaryName,
	ForRedeclaration)) {
	// The scope should be freshly made just for us. There is just no way
	// it contains any previous declaration, except for function parameters in
	// a function-try-block's catch statement.
	assert(!S->isDeclScope(PrevDecl));
	if (isDeclInScope(PrevDecl, CurContext, S)) {
	Diag(D.getIdentifierLoc(), diag::err_redefinition)
	<< D.getIdentifier();
	Diag(PrevDecl->getLocation(), diag::note_previous_definition);
	Invalid = true;
	} else if (PrevDecl->isTemplateParameter())
	// Maybe we will complain about the shadowed template parameter.
	DiagnoseTemplateParameterShadow(D.getIdentifierLoc(), PrevDecl);
	}

	if (D.getCXXScopeSpec().isSet() && !Invalid) {
	Diag(D.getIdentifierLoc(), diag::err_qualified_catch_declarator)
	<< D.getCXXScopeSpec().getRange();
	Invalid = true;
	}

	VarDecl *ExDecl = BuildExceptionDeclaration(S, TInfo,
	D.getLocStart(),
	D.getIdentifierLoc(),
	D.getIdentifier());
	if (Invalid)
	ExDecl->setInvalidDecl();

	// Add the exception declaration into this scope.
	if (II)
	PushOnScopeChains(ExDecl, S);
	else
	CurContext->addDecl(ExDecl);

	ProcessDeclAttributes(S, ExDecl, D);
	return ExDecl;
	}

	Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc,
	Expr *AssertExpr,
	Expr *AssertMessageExpr,
	SourceLocation RParenLoc) {
	StringLiteral *AssertMessage =
	AssertMessageExpr ? cast<StringLiteral>(AssertMessageExpr) : nullptr;

	if (DiagnoseUnexpandedParameterPack(AssertExpr, UPPC_StaticAssertExpression))
	return nullptr;

	return BuildStaticAssertDeclaration(StaticAssertLoc, AssertExpr,
	AssertMessage, RParenLoc, false);
	}

	Decl *Sema::BuildStaticAssertDeclaration(SourceLocation StaticAssertLoc,
	Expr *AssertExpr,
	StringLiteral *AssertMessage,
	SourceLocation RParenLoc,
	bool Failed) {
	assert(AssertExpr != nullptr && "Expected non-null condition");
	if (!AssertExpr->isTypeDependent() && !AssertExpr->isValueDependent() &&
	!Failed) {
	// In a static_assert-declaration, the constant-expression shall be a
	// constant expression that can be contextually converted to bool.
	ExprResult Converted = PerformContextuallyConvertToBool(AssertExpr);
	if (Converted.isInvalid())
	Failed = true;

	llvm::APSInt Cond;
	if (!Failed && VerifyIntegerConstantExpression(Converted.get(), &Cond,
	diag::err_static_assert_expression_is_not_constant,
	/AllowFold=/false).isInvalid())
	Failed = true;

	if (!Failed && !Cond) {
	SmallString<256> MsgBuffer;
	llvm::raw_svector_ostream Msg(MsgBuffer);
	if (AssertMessage)
	AssertMessage->printPretty(Msg, nullptr, getPrintingPolicy());
	Diag(StaticAssertLoc, diag::err_static_assert_failed)
	<< !AssertMessage << Msg.str() << AssertExpr->getSourceRange();
	Failed = true;
	}
	}

	ExprResult FullAssertExpr = ActOnFinishFullExpr(AssertExpr, StaticAssertLoc,
	/DiscardedValue/false,
	/IsConstexpr/true);
	if (FullAssertExpr.isInvalid())
	Failed = true;
	else
	AssertExpr = FullAssertExpr.get();

	Decl *Decl = StaticAssertDecl::Create(Context, CurContext, StaticAssertLoc,
	AssertExpr, AssertMessage, RParenLoc,
	Failed);

	CurContext->addDecl(Decl);
	return Decl;
	}

	/// \brief Perform semantic analysis of the given friend type declaration.
	///
	/// \returns A friend declaration that.
	FriendDecl *Sema::CheckFriendTypeDecl(SourceLocation LocStart,
	SourceLocation FriendLoc,
	TypeSourceInfo *TSInfo) {
	assert(TSInfo && "NULL TypeSourceInfo for friend type declaration");

	QualType T = TSInfo->getType();
	SourceRange TypeRange = TSInfo->getTypeLoc().getLocalSourceRange();

	// C++03 [class.friend]p2:
	// An elaborated-type-specifier shall be used in a friend declaration
	// for a class.*
	//
	// * The class-key of the elaborated-type-specifier is required.
	if (!CodeSynthesisContexts.empty()) {
	// Do not complain about the form of friend template types during any kind
	// of code synthesis. For template instantiation, we will have complained
	// when the template was defined.
	} else {
	if (!T->isElaboratedTypeSpecifier()) {
	// If we evaluated the type to a record type, suggest putting
	// a tag in front.
	if (const RecordType *RT = T->getAs<RecordType>()) {
	RecordDecl *RD = RT->getDecl();

	SmallString<16> InsertionText(" ");
	InsertionText += RD->getKindName();

	Diag(TypeRange.getBegin(),
	getLangOpts().CPlusPlus11 ?
	diag::warn_cxx98_compat_unelaborated_friend_type :
	diag::ext_unelaborated_friend_type)
	<< (unsigned) RD->getTagKind()
	<< T
	<< FixItHint::CreateInsertion(getLocForEndOfToken(FriendLoc),
	InsertionText);
	} else {
	Diag(FriendLoc,
	getLangOpts().CPlusPlus11 ?
	diag::warn_cxx98_compat_nonclass_type_friend :
	diag::ext_nonclass_type_friend)
	<< T
	<< TypeRange;
	}
	} else if (T->getAs<EnumType>()) {
	Diag(FriendLoc,
	getLangOpts().CPlusPlus11 ?
	diag::warn_cxx98_compat_enum_friend :
	diag::ext_enum_friend)
	<< T
	<< TypeRange;
	}

	// C++11 [class.friend]p3:
	// A friend declaration that does not declare a function shall have one
	// of the following forms:
	// friend elaborated-type-specifier ;
	// friend simple-type-specifier ;
	// friend typename-specifier ;
	if (getLangOpts().CPlusPlus11 && LocStart != FriendLoc)
	Diag(FriendLoc, diag::err_friend_not_first_in_declaration) << T;
	}

	// If the type specifier in a friend declaration designates a (possibly
	// cv-qualified) class type, that class is declared as a friend; otherwise,
	// the friend declaration is ignored.
	return FriendDecl::Create(Context, CurContext,
	TSInfo->getTypeLoc().getLocStart(), TSInfo,
	FriendLoc);
	}

	/// Handle a friend tag declaration where the scope specifier was
	/// templated.
	Decl Sema::ActOnTemplatedFriendTag(Scope S, SourceLocation FriendLoc,
	unsigned TagSpec, SourceLocation TagLoc,
	CXXScopeSpec &SS,
	IdentifierInfo *Name,
	SourceLocation NameLoc,
	AttributeList *Attr,
	MultiTemplateParamsArg TempParamLists) {
	TagTypeKind Kind = TypeWithKeyword::getTagTypeKindForTypeSpec(TagSpec);

	bool IsMemberSpecialization = false;
	bool Invalid = false;

	if (TemplateParameterList *TemplateParams =
	MatchTemplateParametersToScopeSpecifier(
	TagLoc, NameLoc, SS, nullptr, TempParamLists, /friend/ true,
	IsMemberSpecialization, Invalid)) {
	if (TemplateParams->size() > 0) {
	// This is a declaration of a class template.
	if (Invalid)
	return nullptr;

	return CheckClassTemplate(S, TagSpec, TUK_Friend, TagLoc, SS, Name,
	NameLoc, Attr, TemplateParams, AS_public,
	/ModulePrivateLoc=/SourceLocation(),
	FriendLoc, TempParamLists.size() - 1,
	TempParamLists.data()).get();
	} else {
	// The "template<>" header is extraneous.
	Diag(TemplateParams->getTemplateLoc(), diag::err_template_tag_noparams)
	<< TypeWithKeyword::getTagTypeKindName(Kind) << Name;
	IsMemberSpecialization = true;
	}
	}

	if (Invalid) return nullptr;

	bool isAllExplicitSpecializations = true;
	for (unsigned I = TempParamLists.size(); I-- > 0; ) {
	if (TempParamLists[I]->size()) {
	isAllExplicitSpecializations = false;
	break;
	}
	}

	// FIXME: don't ignore attributes.

	// If it's explicit specializations all the way down, just forget
	// about the template header and build an appropriate non-templated
	// friend. TODO: for source fidelity, remember the headers.
	if (isAllExplicitSpecializations) {
	if (SS.isEmpty()) {
	bool Owned = false;
	bool IsDependent = false;
	return ActOnTag(S, TagSpec, TUK_Friend, TagLoc, SS, Name, NameLoc,
	Attr, AS_public,
	/ModulePrivateLoc=/SourceLocation(),
	MultiTemplateParamsArg(), Owned, IsDependent,
	/ScopedEnumKWLoc=/SourceLocation(),
	/ScopedEnumUsesClassTag=/false,
	/UnderlyingType=/TypeResult(),
	/IsTypeSpecifier=/false,
	/IsTemplateParamOrArg=/false);
	}

	NestedNameSpecifierLoc QualifierLoc = SS.getWithLocInContext(Context);
	ElaboratedTypeKeyword Keyword
	= TypeWithKeyword::getKeywordForTagTypeKind(Kind);
	QualType T = CheckTypenameType(Keyword, TagLoc, QualifierLoc,
	*Name, NameLoc);
	if (T.isNull())
	return nullptr;

	TypeSourceInfo *TSI = Context.CreateTypeSourceInfo(T);
	if (isa<DependentNameType>(T)) {
	DependentNameTypeLoc TL =
	TSI->getTypeLoc().castAs<DependentNameTypeLoc>();
	TL.setElaboratedKeywordLoc(TagLoc);
	TL.setQualifierLoc(QualifierLoc);
	TL.setNameLoc(NameLoc);
	} else {
	ElaboratedTypeLoc TL = TSI->getTypeLoc().castAs<ElaboratedTypeLoc>();
	TL.setElaboratedKeywordLoc(TagLoc);
	TL.setQualifierLoc(QualifierLoc);
	TL.getNamedTypeLoc().castAs<TypeSpecTypeLoc>().setNameLoc(NameLoc);
	}

	FriendDecl *Friend = FriendDecl::Create(Context, CurContext, NameLoc,
	TSI, FriendLoc, TempParamLists);
	Friend->setAccess(AS_public);
	CurContext->addDecl(Friend);
	return Friend;
	}

	assert(SS.isNotEmpty() && "valid templated tag with no SS and no direct?");



	// Handle the case of a templated-scope friend class. e.g.
	// template <class T> class A<T>::B;
	// FIXME: we don't support these right now.
	Diag(NameLoc, diag::warn_template_qualified_friend_unsupported)
	<< SS.getScopeRep() << SS.getRange() << cast<CXXRecordDecl>(CurContext);
	ElaboratedTypeKeyword ETK = TypeWithKeyword::getKeywordForTagTypeKind(Kind);
	QualType T = Context.getDependentNameType(ETK, SS.getScopeRep(), Name);
	TypeSourceInfo *TSI = Context.CreateTypeSourceInfo(T);
	DependentNameTypeLoc TL = TSI->getTypeLoc().castAs<DependentNameTypeLoc>();
	TL.setElaboratedKeywordLoc(TagLoc);
	TL.setQualifierLoc(SS.getWithLocInContext(Context));
	TL.setNameLoc(NameLoc);

	FriendDecl *Friend = FriendDecl::Create(Context, CurContext, NameLoc,
	TSI, FriendLoc, TempParamLists);
	Friend->setAccess(AS_public);
	Friend->setUnsupportedFriend(true);
	CurContext->addDecl(Friend);
	return Friend;
	}


	/// Handle a friend type declaration. This works in tandem with
	/// ActOnTag.
	///
	/// Notes on friend class templates:
	///
	/// We generally treat friend class declarations as if they were
	/// declaring a class. So, for example, the elaborated type specifier
	/// in a friend declaration is required to obey the restrictions of a
	/// class-head (i.e. no typedefs in the scope chain), template
	/// parameters are required to match up with simple template-ids, &c.
	/// However, unlike when declaring a template specialization, it's
	/// okay to refer to a template specialization without an empty
	/// template parameter declaration, e.g.
	/// friend class A<T>::B<unsigned>;
	/// We permit this as a special case; if there are any template
	/// parameters present at all, require proper matching, i.e.
	/// template <> template \<class T> friend class A<int>::B;
	Decl Sema::ActOnFriendTypeDecl(Scope S, const DeclSpec &DS,
	MultiTemplateParamsArg TempParams) {
	SourceLocation Loc = DS.getLocStart();

	assert(DS.isFriendSpecified());
	assert(DS.getStorageClassSpec() == DeclSpec::SCS_unspecified);

	// Try to convert the decl specifier to a type. This works for
	// friend templates because ActOnTag never produces a ClassTemplateDecl
	// for a TUK_Friend.
	Declarator TheDeclarator(DS, Declarator::MemberContext);
	TypeSourceInfo *TSI = GetTypeForDeclarator(TheDeclarator, S);
	QualType T = TSI->getType();
	if (TheDeclarator.isInvalidType())
	return nullptr;

	if (DiagnoseUnexpandedParameterPack(Loc, TSI, UPPC_FriendDeclaration))
	return nullptr;

	// This is definitely an error in C++98. It's probably meant to
	// be forbidden in C++0x, too, but the specification is just
	// poorly written.
	//
	// The problem is with declarations like the following:
	// template <T> friend A<T>::foo;
	// where deciding whether a class C is a friend or not now hinges
	// on whether there exists an instantiation of A that causes
	// 'foo' to equal C. There are restrictions on class-heads
	// (which we declare (by fiat) elaborated friend declarations to
	// be) that makes this tractable.
	//
	// FIXME: handle "template <> friend class A<T>;", which
	// is possibly well-formed? Who even knows?
	if (TempParams.size() && !T->isElaboratedTypeSpecifier()) {
	Diag(Loc, diag::err_tagless_friend_type_template)
	<< DS.getSourceRange();
	return nullptr;
	}

	// C++98 [class.friend]p1: A friend of a class is a function
	// or class that is not a member of the class . . .
	// This is fixed in DR77, which just barely didn't make the C++03
	// deadline. It's also a very silly restriction that seriously
	// affects inner classes and which nobody else seems to implement;
	// thus we never diagnose it, not even in -pedantic.
	//
	// But note that we could warn about it: it's always useless to
	// friend one of your own members (it's not, however, worthless to
	// friend a member of an arbitrary specialization of your template).

	Decl *D;
	if (!TempParams.empty())
	D = FriendTemplateDecl::Create(Context, CurContext, Loc,
	TempParams,
	TSI,
	DS.getFriendSpecLoc());
	else
	D = CheckFriendTypeDecl(Loc, DS.getFriendSpecLoc(), TSI);

	if (!D)
	return nullptr;

	D->setAccess(AS_public);
	CurContext->addDecl(D);

	return D;
	}

	NamedDecl Sema::ActOnFriendFunctionDecl(Scope S, Declarator &D,
	MultiTemplateParamsArg TemplateParams) {
	const DeclSpec &DS = D.getDeclSpec();

	assert(DS.isFriendSpecified());
	assert(DS.getStorageClassSpec() == DeclSpec::SCS_unspecified);

	SourceLocation Loc = D.getIdentifierLoc();
	TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);

	// C++ [class.friend]p1
	// A friend of a class is a function or class....
	// Note that this sees through typedefs, which is intended.
	// It doesn't see through dependent types, which is correct
	// according to [temp.arg.type]p3:
	// If a declaration acquires a function type through a
	// type dependent on a template-parameter and this causes
	// a declaration that does not use the syntactic form of a
	// function declarator to have a function type, the program
	// is ill-formed.
	if (!TInfo->getType()->isFunctionType()) {
	Diag(Loc, diag::err_unexpected_friend);

	// It might be worthwhile to try to recover by creating an
	// appropriate declaration.
	return nullptr;
	}

	// C++ [namespace.memdef]p3
	// - If a friend declaration in a non-local class first declares a
	// class or function, the friend class or function is a member
	// of the innermost enclosing namespace.
	// - The name of the friend is not found by simple name lookup
	// until a matching declaration is provided in that namespace
	// scope (either before or after the class declaration granting
	// friendship).
	// - If a friend function is called, its name may be found by the
	// name lookup that considers functions from namespaces and
	// classes associated with the types of the function arguments.
	// - When looking for a prior declaration of a class or a function
	// declared as a friend, scopes outside the innermost enclosing
	// namespace scope are not considered.

	CXXScopeSpec &SS = D.getCXXScopeSpec();
	DeclarationNameInfo NameInfo = GetNameForDeclarator(D);
	DeclarationName Name = NameInfo.getName();
	assert(Name);

	// Check for unexpanded parameter packs.
	if (DiagnoseUnexpandedParameterPack(Loc, TInfo, UPPC_FriendDeclaration) \|\|
	DiagnoseUnexpandedParameterPack(NameInfo, UPPC_FriendDeclaration) \|\|
	DiagnoseUnexpandedParameterPack(SS, UPPC_FriendDeclaration))
	return nullptr;

	// The context we found the declaration in, or in which we should
	// create the declaration.
	DeclContext *DC;
	Scope *DCScope = S;
	LookupResult Previous(*this, NameInfo, LookupOrdinaryName,
	ForRedeclaration);

	// There are five cases here.
	// - There's no scope specifier and we're in a local class. Only look
	// for functions declared in the immediately-enclosing block scope.
	// We recover from invalid scope qualifiers as if they just weren't there.
	FunctionDecl *FunctionContainingLocalClass = nullptr;
	if ((SS.isInvalid() \|\| !SS.isSet()) &&
	(FunctionContainingLocalClass =
	cast<CXXRecordDecl>(CurContext)->isLocalClass())) {
	// C++11 [class.friend]p11:
	// If a friend declaration appears in a local class and the name
	// specified is an unqualified name, a prior declaration is
	// looked up without considering scopes that are outside the
	// innermost enclosing non-class scope. For a friend function
	// declaration, if there is no prior declaration, the program is
	// ill-formed.

	// Find the innermost enclosing non-class scope. This is the block
	// scope containing the local class definition (or for a nested class,
	// the outer local class).
	DCScope = S->getFnParent();

	// Look up the function name in the scope.
	Previous.clear(LookupLocalFriendName);
	LookupName(Previous, S, /AllowBuiltinCreation/false);

	if (!Previous.empty()) {
	// All possible previous declarations must have the same context:
	// either they were declared at block scope or they are members of
	// one of the enclosing local classes.
	DC = Previous.getRepresentativeDecl()->getDeclContext();
	} else {
	// This is ill-formed, but provide the context that we would have
	// declared the function in, if we were permitted to, for error recovery.
	DC = FunctionContainingLocalClass;
	}
	adjustContextForLocalExternDecl(DC);

	// C++ [class.friend]p6:
	// A function can be defined in a friend declaration of a class if and
	// only if the class is a non-local class (9.8), the function name is
	// unqualified, and the function has namespace scope.
	if (D.isFunctionDefinition()) {
	Diag(NameInfo.getBeginLoc(), diag::err_friend_def_in_local_class);
	}

	// - There's no scope specifier, in which case we just go to the
	// appropriate scope and look for a function or function template
	// there as appropriate.
	} else if (SS.isInvalid() \|\| !SS.isSet()) {
	// C++11 [namespace.memdef]p3:
	// If the name in a friend declaration is neither qualified nor
	// a template-id and the declaration is a function or an
	// elaborated-type-specifier, the lookup to determine whether
	// the entity has been previously declared shall not consider
	// any scopes outside the innermost enclosing namespace.
	bool isTemplateId = D.getName().getKind() == UnqualifiedId::IK_TemplateId;

	// Find the appropriate context according to the above.
	DC = CurContext;

	// Skip class contexts. If someone can cite chapter and verse
	// for this behavior, that would be nice --- it's what GCC and
	// EDG do, and it seems like a reasonable intent, but the spec
	// really only says that checks for unqualified existing
	// declarations should stop at the nearest enclosing namespace,
	// not that they should only consider the nearest enclosing
	// namespace.
	while (DC->isRecord())
	DC = DC->getParent();

	DeclContext *LookupDC = DC;
	while (LookupDC->isTransparentContext())
	LookupDC = LookupDC->getParent();

	while (true) {
	LookupQualifiedName(Previous, LookupDC);

	if (!Previous.empty()) {
	DC = LookupDC;
	break;
	}

	if (isTemplateId) {
	if (isa<TranslationUnitDecl>(LookupDC)) break;
	} else {
	if (LookupDC->isFileContext()) break;
	}
	LookupDC = LookupDC->getParent();
	}

	DCScope = getScopeForDeclContext(S, DC);

	// - There's a non-dependent scope specifier, in which case we
	// compute it and do a previous lookup there for a function
	// or function template.
	} else if (!SS.getScopeRep()->isDependent()) {
	DC = computeDeclContext(SS);
	if (!DC) return nullptr;

	if (RequireCompleteDeclContext(SS, DC)) return nullptr;

	LookupQualifiedName(Previous, DC);

	// Ignore things found implicitly in the wrong scope.
	// TODO: better diagnostics for this case. Suggesting the right
	// qualified scope would be nice...
	LookupResult::Filter F = Previous.makeFilter();
	while (F.hasNext()) {
	NamedDecl *D = F.next();
	if (!DC->InEnclosingNamespaceSetOf(
	D->getDeclContext()->getRedeclContext()))
	F.erase();
	}
	F.done();

	if (Previous.empty()) {
	D.setInvalidType();
	Diag(Loc, diag::err_qualified_friend_not_found)
	<< Name << TInfo->getType();
	return nullptr;
	}

	// C++ [class.friend]p1: A friend of a class is a function or
	// class that is not a member of the class . . .
	if (DC->Equals(CurContext))
	Diag(DS.getFriendSpecLoc(),
	getLangOpts().CPlusPlus11 ?
	diag::warn_cxx98_compat_friend_is_member :
	diag::err_friend_is_member);

	if (D.isFunctionDefinition()) {
	// C++ [class.friend]p6:
	// A function can be defined in a friend declaration of a class if and
	// only if the class is a non-local class (9.8), the function name is
	// unqualified, and the function has namespace scope.
	SemaDiagnosticBuilder DB
	= Diag(SS.getRange().getBegin(), diag::err_qualified_friend_def);

	DB << SS.getScopeRep();
	if (DC->isFileContext())
	DB << FixItHint::CreateRemoval(SS.getRange());
	SS.clear();
	}

	// - There's a scope specifier that does not match any template
	// parameter lists, in which case we use some arbitrary context,
	// create a method or method template, and wait for instantiation.
	// - There's a scope specifier that does match some template
	// parameter lists, which we don't handle right now.
	} else {
	if (D.isFunctionDefinition()) {
	// C++ [class.friend]p6:
	// A function can be defined in a friend declaration of a class if and
	// only if the class is a non-local class (9.8), the function name is
	// unqualified, and the function has namespace scope.
	Diag(SS.getRange().getBegin(), diag::err_qualified_friend_def)
	<< SS.getScopeRep();
	}

	DC = CurContext;
	assert(isa<CXXRecordDecl>(DC) && "friend declaration not in class?");
	}

	if (!DC->isRecord()) {
	int DiagArg = -1;
	switch (D.getName().getKind()) {
	case UnqualifiedId::IK_ConstructorTemplateId:
	case UnqualifiedId::IK_ConstructorName:
	DiagArg = 0;
	break;
	case UnqualifiedId::IK_DestructorName:
	DiagArg = 1;
	break;
	case UnqualifiedId::IK_ConversionFunctionId:
	DiagArg = 2;
	break;
	case UnqualifiedId::IK_DeductionGuideName:
	DiagArg = 3;
	break;
	case UnqualifiedId::IK_Identifier:
	case UnqualifiedId::IK_ImplicitSelfParam:
	case UnqualifiedId::IK_LiteralOperatorId:
	case UnqualifiedId::IK_OperatorFunctionId:
	case UnqualifiedId::IK_TemplateId:
	break;
	}
	// This implies that it has to be an operator or function.
	if (DiagArg >= 0) {
	Diag(Loc, diag::err_introducing_special_friend) << DiagArg;
	return nullptr;
	}
	}

	// FIXME: This is an egregious hack to cope with cases where the scope stack
	// does not contain the declaration context, i.e., in an out-of-line
	// definition of a class.
	Scope FakeDCScope(S, Scope::DeclScope, Diags);
	if (!DCScope) {
	FakeDCScope.setEntity(DC);
	DCScope = &FakeDCScope;
	}

	bool AddToScope = true;
	NamedDecl *ND = ActOnFunctionDeclarator(DCScope, D, DC, TInfo, Previous,
	TemplateParams, AddToScope);
	if (!ND) return nullptr;

	assert(ND->getLexicalDeclContext() == CurContext);

	// If we performed typo correction, we might have added a scope specifier
	// and changed the decl context.
	DC = ND->getDeclContext();

	// Add the function declaration to the appropriate lookup tables,
	// adjusting the redeclarations list as necessary. We don't
	// want to do this yet if the friending class is dependent.
	//
	// Also update the scope-based lookup if the target context's
	// lookup context is in lexical scope.
	if (!CurContext->isDependentContext()) {
	DC = DC->getRedeclContext();
	DC->makeDeclVisibleInContext(ND);
	if (Scope *EnclosingScope = getScopeForDeclContext(S, DC))
	PushOnScopeChains(ND, EnclosingScope, /AddToContext=/ false);
	}

	FriendDecl *FrD = FriendDecl::Create(Context, CurContext,
	D.getIdentifierLoc(), ND,
	DS.getFriendSpecLoc());
	FrD->setAccess(AS_public);
	CurContext->addDecl(FrD);

	if (ND->isInvalidDecl()) {
	FrD->setInvalidDecl();
	} else {
	if (DC->isRecord()) CheckFriendAccess(ND);

	FunctionDecl *FD;
	if (FunctionTemplateDecl *FTD = dyn_cast<FunctionTemplateDecl>(ND))
	FD = FTD->getTemplatedDecl();
	else
	FD = cast<FunctionDecl>(ND);

	// C++11 [dcl.fct.default]p4: If a friend declaration specifies a
	// default argument expression, that declaration shall be a definition
	// and shall be the only declaration of the function or function
	// template in the translation unit.
	if (functionDeclHasDefaultArgument(FD)) {
	// We can't look at FD->getPreviousDecl() because it may not have been set
	// if we're in a dependent context. If the function is known to be a
	// redeclaration, we will have narrowed Previous down to the right decl.
	if (D.isRedeclaration()) {
	Diag(FD->getLocation(), diag::err_friend_decl_with_def_arg_redeclared);
	Diag(Previous.getRepresentativeDecl()->getLocation(),
	diag::note_previous_declaration);
	} else if (!D.isFunctionDefinition())
	Diag(FD->getLocation(), diag::err_friend_decl_with_def_arg_must_be_def);
	}

	// Mark templated-scope function declarations as unsupported.
	if (FD->getNumTemplateParameterLists() && SS.isValid()) {
	Diag(FD->getLocation(), diag::warn_template_qualified_friend_unsupported)
	<< SS.getScopeRep() << SS.getRange()
	<< cast<CXXRecordDecl>(CurContext);
	FrD->setUnsupportedFriend(true);
	}
	}

	return ND;
	}

	void Sema::SetDeclDeleted(Decl *Dcl, SourceLocation DelLoc) {
	AdjustDeclIfTemplate(Dcl);

	FunctionDecl *Fn = dyn_cast_or_null<FunctionDecl>(Dcl);
	if (!Fn) {
	Diag(DelLoc, diag::err_deleted_non_function);
	return;
	}

	// Deleted function does not have a body.
	Fn->setWillHaveBody(false);

	if (const FunctionDecl *Prev = Fn->getPreviousDecl()) {
	// Don't consider the implicit declaration we generate for explicit
	// specializations. FIXME: Do not generate these implicit declarations.
	if ((Prev->getTemplateSpecializationKind() != TSK_ExplicitSpecialization \|\|
	Prev->getPreviousDecl()) &&
	!Prev->isDefined()) {
	Diag(DelLoc, diag::err_deleted_decl_not_first);
	Diag(Prev->getLocation().isInvalid() ? DelLoc : Prev->getLocation(),
	Prev->isImplicit() ? diag::note_previous_implicit_declaration
	: diag::note_previous_declaration);
	}
	// If the declaration wasn't the first, we delete the function anyway for
	// recovery.
	Fn = Fn->getCanonicalDecl();
	}

	// dllimport/dllexport cannot be deleted.
	if (const InheritableAttr *DLLAttr = getDLLAttr(Fn)) {
	Diag(Fn->getLocation(), diag::err_attribute_dll_deleted) << DLLAttr;
	Fn->setInvalidDecl();
	}

	if (Fn->isDeleted())
	return;

	// See if we're deleting a function which is already known to override a
	// non-deleted virtual function.
	if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Fn)) {
	bool IssuedDiagnostic = false;
	for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
	E = MD->end_overridden_methods();
	I != E; ++I) {
	if (!(*MD->begin_overridden_methods())->isDeleted()) {
	if (!IssuedDiagnostic) {
	Diag(DelLoc, diag::err_deleted_override) << MD->getDeclName();
	IssuedDiagnostic = true;
	}
	Diag((*I)->getLocation(), diag::note_overridden_virtual_function);
	}
	}
	// If this function was implicitly deleted because it was defaulted,
	// explain why it was deleted.
	if (IssuedDiagnostic && MD->isDefaulted())
	ShouldDeleteSpecialMember(MD, getSpecialMember(MD), nullptr,
	/Diagnose/true);
	}

	// C++11 [basic.start.main]p3:
	// A program that defines main as deleted [...] is ill-formed.
	if (Fn->isMain())
	Diag(DelLoc, diag::err_deleted_main);

	// C++11 [dcl.fct.def.delete]p4:
	// A deleted function is implicitly inline.
	Fn->setImplicitlyInline();
	Fn->setDeletedAsWritten();
	}

	void Sema::SetDeclDefaulted(Decl *Dcl, SourceLocation DefaultLoc) {
	CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(Dcl);

	if (MD) {
	if (MD->getParent()->isDependentType()) {
	MD->setDefaulted();
	MD->setExplicitlyDefaulted();
	return;
	}

	CXXSpecialMember Member = getSpecialMember(MD);
	if (Member == CXXInvalid) {
	if (!MD->isInvalidDecl())
	Diag(DefaultLoc, diag::err_default_special_members);
	return;
	}

	MD->setDefaulted();
	MD->setExplicitlyDefaulted();

	// Unset that we will have a body for this function. We might not,
	// if it turns out to be trivial, and we don't need this marking now
	// that we've marked it as defaulted.
	MD->setWillHaveBody(false);

	// If this definition appears within the record, do the checking when
	// the record is complete.
	const FunctionDecl *Primary = MD;
	if (const FunctionDecl *Pattern = MD->getTemplateInstantiationPattern())
	// Ask the template instantiation pattern that actually had the
	// '= default' on it.
	Primary = Pattern;

	// If the method was defaulted on its first declaration, we will have
	// already performed the checking in CheckCompletedCXXClass. Such a
	// declaration doesn't trigger an implicit definition.
	if (Primary->getCanonicalDecl()->isDefaulted())
	return;

	CheckExplicitlyDefaultedSpecialMember(MD);

	if (!MD->isInvalidDecl())
	DefineImplicitSpecialMember(*this, MD, DefaultLoc);
	} else {
	Diag(DefaultLoc, diag::err_default_special_members);
	}
	}

	static void SearchForReturnInStmt(Sema &Self, Stmt *S) {
	for (Stmt *SubStmt : S->children()) {
	if (!SubStmt)
	continue;
	if (isa<ReturnStmt>(SubStmt))
	Self.Diag(SubStmt->getLocStart(),
	diag::err_return_in_constructor_handler);
	if (!isa<Expr>(SubStmt))
	SearchForReturnInStmt(Self, SubStmt);
	}
	}

	void Sema::DiagnoseReturnInConstructorExceptionHandler(CXXTryStmt *TryBlock) {
	for (unsigned I = 0, E = TryBlock->getNumHandlers(); I != E; ++I) {
	CXXCatchStmt *Handler = TryBlock->getHandler(I);
	SearchForReturnInStmt(*this, Handler);
	}
	}

	bool Sema::CheckOverridingFunctionAttributes(const CXXMethodDecl *New,
	const CXXMethodDecl *Old) {
	const FunctionType *NewFT = New->getType()->getAs<FunctionType>();
	const FunctionType *OldFT = Old->getType()->getAs<FunctionType>();

	CallingConv NewCC = NewFT->getCallConv(), OldCC = OldFT->getCallConv();

	// If the calling conventions match, everything is fine
	if (NewCC == OldCC)
	return false;

	// If the calling conventions mismatch because the new function is static,
	// suppress the calling convention mismatch error; the error about static
	// function override (err_static_overrides_virtual from
	// Sema::CheckFunctionDeclaration) is more clear.
	if (New->getStorageClass() == SC_Static)
	return false;

	Diag(New->getLocation(),
	diag::err_conflicting_overriding_cc_attributes)
	<< New->getDeclName() << New->getType() << Old->getType();
	Diag(Old->getLocation(), diag::note_overridden_virtual_function);
	return true;
	}

	bool Sema::CheckOverridingFunctionReturnType(const CXXMethodDecl *New,
	const CXXMethodDecl *Old) {
	QualType NewTy = New->getType()->getAs<FunctionType>()->getReturnType();
	QualType OldTy = Old->getType()->getAs<FunctionType>()->getReturnType();

	if (Context.hasSameType(NewTy, OldTy) \|\|
	NewTy->isDependentType() \|\| OldTy->isDependentType())
	return false;

	// Check if the return types are covariant
	QualType NewClassTy, OldClassTy;

	/// Both types must be pointers or references to classes.
	if (const PointerType *NewPT = NewTy->getAs<PointerType>()) {
	if (const PointerType *OldPT = OldTy->getAs<PointerType>()) {
	NewClassTy = NewPT->getPointeeType();
	OldClassTy = OldPT->getPointeeType();
	}
	} else if (const ReferenceType *NewRT = NewTy->getAs<ReferenceType>()) {
	if (const ReferenceType *OldRT = OldTy->getAs<ReferenceType>()) {
	if (NewRT->getTypeClass() == OldRT->getTypeClass()) {
	NewClassTy = NewRT->getPointeeType();
	OldClassTy = OldRT->getPointeeType();
	}
	}
	}

	// The return types aren't either both pointers or references to a class type.
	if (NewClassTy.isNull()) {
	Diag(New->getLocation(),
	diag::err_different_return_type_for_overriding_virtual_function)
	<< New->getDeclName() << NewTy << OldTy
	<< New->getReturnTypeSourceRange();
	Diag(Old->getLocation(), diag::note_overridden_virtual_function)
	<< Old->getReturnTypeSourceRange();

	return true;
	}

	if (!Context.hasSameUnqualifiedType(NewClassTy, OldClassTy)) {
	// C++14 [class.virtual]p8:
	// If the class type in the covariant return type of D::f differs from
	// that of B::f, the class type in the return type of D::f shall be
	// complete at the point of declaration of D::f or shall be the class
	// type D.
	if (const RecordType *RT = NewClassTy->getAs<RecordType>()) {
	if (!RT->isBeingDefined() &&
	RequireCompleteType(New->getLocation(), NewClassTy,
	diag::err_covariant_return_incomplete,
	New->getDeclName()))
	return true;
	}

	// Check if the new class derives from the old class.
	if (!IsDerivedFrom(New->getLocation(), NewClassTy, OldClassTy)) {
	Diag(New->getLocation(), diag::err_covariant_return_not_derived)
	<< New->getDeclName() << NewTy << OldTy
	<< New->getReturnTypeSourceRange();
	Diag(Old->getLocation(), diag::note_overridden_virtual_function)
	<< Old->getReturnTypeSourceRange();
	return true;
	}

	// Check if we the conversion from derived to base is valid.
	if (CheckDerivedToBaseConversion(
	NewClassTy, OldClassTy,
	diag::err_covariant_return_inaccessible_base,
	diag::err_covariant_return_ambiguous_derived_to_base_conv,
	New->getLocation(), New->getReturnTypeSourceRange(),
	New->getDeclName(), nullptr)) {
	// FIXME: this note won't trigger for delayed access control
	// diagnostics, and it's impossible to get an undelayed error
	// here from access control during the original parse because
	// the ParsingDeclSpec/ParsingDeclarator are still in scope.
	Diag(Old->getLocation(), diag::note_overridden_virtual_function)
	<< Old->getReturnTypeSourceRange();
	return true;
	}
	}

	// The qualifiers of the return types must be the same.
	if (NewTy.getLocalCVRQualifiers() != OldTy.getLocalCVRQualifiers()) {
	Diag(New->getLocation(),
	diag::err_covariant_return_type_different_qualifications)
	<< New->getDeclName() << NewTy << OldTy
	<< New->getReturnTypeSourceRange();
	Diag(Old->getLocation(), diag::note_overridden_virtual_function)
	<< Old->getReturnTypeSourceRange();
	return true;
	}


	// The new class type must have the same or less qualifiers as the old type.
	if (NewClassTy.isMoreQualifiedThan(OldClassTy)) {
	Diag(New->getLocation(),
	diag::err_covariant_return_type_class_type_more_qualified)
	<< New->getDeclName() << NewTy << OldTy
	<< New->getReturnTypeSourceRange();
	Diag(Old->getLocation(), diag::note_overridden_virtual_function)
	<< Old->getReturnTypeSourceRange();
	return true;
	}

	return false;
	}

	/// \brief Mark the given method pure.
	///
	/// \param Method the method to be marked pure.
	///
	/// \param InitRange the source range that covers the "0" initializer.
	bool Sema::CheckPureMethod(CXXMethodDecl *Method, SourceRange InitRange) {
	SourceLocation EndLoc = InitRange.getEnd();
	if (EndLoc.isValid())
	Method->setRangeEnd(EndLoc);

	if (Method->isVirtual() \|\| Method->getParent()->isDependentContext()) {
	Method->setPure();
	return false;
	}

	if (!Method->isInvalidDecl())
	Diag(Method->getLocation(), diag::err_non_virtual_pure)
	<< Method->getDeclName() << InitRange;
	return true;
	}

	void Sema::ActOnPureSpecifier(Decl *D, SourceLocation ZeroLoc) {
	if (D->getFriendObjectKind())
	Diag(D->getLocation(), diag::err_pure_friend);
	else if (auto *M = dyn_cast<CXXMethodDecl>(D))
	CheckPureMethod(M, ZeroLoc);
	else
	Diag(D->getLocation(), diag::err_illegal_initializer);
	}

	/// \brief Determine whether the given declaration is a static data member.
	static bool isStaticDataMember(const Decl *D) {
	if (const VarDecl *Var = dyn_cast_or_null<VarDecl>(D))
	return Var->isStaticDataMember();

	return false;
	}

	/// ActOnCXXEnterDeclInitializer - Invoked when we are about to parse
	/// an initializer for the out-of-line declaration 'Dcl'. The scope
	/// is a fresh scope pushed for just this purpose.
	///
	/// After this method is called, according to [C++ 3.4.1p13], if 'Dcl' is a
	/// static data member of class X, names should be looked up in the scope of
	/// class X.
	void Sema::ActOnCXXEnterDeclInitializer(Scope S, Decl D) {
	// If there is no declaration, there was an error parsing it.
	if (!D \|\| D->isInvalidDecl())
	return;

	// We will always have a nested name specifier here, but this declaration
	// might not be out of line if the specifier names the current namespace:
	// extern int n;
	// int ::n = 0;
	if (D->isOutOfLine())
	EnterDeclaratorContext(S, D->getDeclContext());

	// If we are parsing the initializer for a static data member, push a
	// new expression evaluation context that is associated with this static
	// data member.
	if (isStaticDataMember(D))
	PushExpressionEvaluationContext(
	ExpressionEvaluationContext::PotentiallyEvaluated, D);
	}

	/// ActOnCXXExitDeclInitializer - Invoked after we are finished parsing an
	/// initializer for the out-of-line declaration 'D'.
	void Sema::ActOnCXXExitDeclInitializer(Scope S, Decl D) {
	// If there is no declaration, there was an error parsing it.
	if (!D \|\| D->isInvalidDecl())
	return;

	if (isStaticDataMember(D))
	PopExpressionEvaluationContext();

	if (D->isOutOfLine())
	ExitDeclaratorContext(S);
	}

	/// ActOnCXXConditionDeclarationExpr - Parsed a condition declaration of a
	/// C++ if/switch/while/for statement.
	/// e.g: "if (int x = f()) {...}"
	DeclResult Sema::ActOnCXXConditionDeclaration(Scope *S, Declarator &D) {
	// C++ 6.4p2:
	// The declarator shall not specify a function or an array.
	// The type-specifier-seq shall not contain typedef and shall not declare a
	// new class or enumeration.
	assert(D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_typedef &&
	"Parser allowed 'typedef' as storage class of condition decl.");

	Decl *Dcl = ActOnDeclarator(S, D);
	if (!Dcl)
	return true;

	if (isa<FunctionDecl>(Dcl)) { // The declarator shall not specify a function.
	Diag(Dcl->getLocation(), diag::err_invalid_use_of_function_type)
	<< D.getSourceRange();
	return true;
	}

	return Dcl;
	}

	void Sema::LoadExternalVTableUses() {
	if (!ExternalSource)
	return;

	SmallVector<ExternalVTableUse, 4> VTables;
	ExternalSource->ReadUsedVTables(VTables);
	SmallVector<VTableUse, 4> NewUses;
	for (unsigned I = 0, N = VTables.size(); I != N; ++I) {
	llvm::DenseMap<CXXRecordDecl *, bool>::iterator Pos
	= VTablesUsed.find(VTables[I].Record);
	// Even if a definition wasn't required before, it may be required now.
	if (Pos != VTablesUsed.end()) {
	if (!Pos->second && VTables[I].DefinitionRequired)
	Pos->second = true;
	continue;
	}

	VTablesUsed[VTables[I].Record] = VTables[I].DefinitionRequired;
	NewUses.push_back(VTableUse(VTables[I].Record, VTables[I].Location));
	}

	VTableUses.insert(VTableUses.begin(), NewUses.begin(), NewUses.end());
	}

	void Sema::MarkVTableUsed(SourceLocation Loc, CXXRecordDecl *Class,
	bool DefinitionRequired) {
	// Ignore any vtable uses in unevaluated operands or for classes that do
	// not have a vtable.
	if (!Class->isDynamicClass() \|\| Class->isDependentContext() \|\|
	CurContext->isDependentContext() \|\| isUnevaluatedContext())
	return;

	// Try to insert this class into the map.
	LoadExternalVTableUses();
	Class = cast<CXXRecordDecl>(Class->getCanonicalDecl());
	std::pair<llvm::DenseMap<CXXRecordDecl *, bool>::iterator, bool>
	Pos = VTablesUsed.insert(std::make_pair(Class, DefinitionRequired));
	if (!Pos.second) {
	// If we already had an entry, check to see if we are promoting this vtable
	// to require a definition. If so, we need to reappend to the VTableUses
	// list, since we may have already processed the first entry.
	if (DefinitionRequired && !Pos.first->second) {
	Pos.first->second = true;
	} else {
	// Otherwise, we can early exit.
	return;
	}
	} else {
	// The Microsoft ABI requires that we perform the destructor body
	// checks (i.e. operator delete() lookup) when the vtable is marked used, as
	// the deleting destructor is emitted with the vtable, not with the
	// destructor definition as in the Itanium ABI.
	if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
	CXXDestructorDecl *DD = Class->getDestructor();
	if (DD && DD->isVirtual() && !DD->isDeleted()) {
	if (Class->hasUserDeclaredDestructor() && !DD->isDefined()) {
	// If this is an out-of-line declaration, marking it referenced will
	// not do anything. Manually call CheckDestructor to look up operator
	// delete().
	ContextRAII SavedContext(*this, DD);
	CheckDestructor(DD);
	} else {
	MarkFunctionReferenced(Loc, Class->getDestructor());
	}
	}
	}
	}

	// Local classes need to have their virtual members marked
	// immediately. For all other classes, we mark their virtual members
	// at the end of the translation unit.
	if (Class->isLocalClass())
	MarkVirtualMembersReferenced(Loc, Class);
	else
	VTableUses.push_back(std::make_pair(Class, Loc));
	}

	bool Sema::DefineUsedVTables() {
	LoadExternalVTableUses();
	if (VTableUses.empty())
	return false;

	// Note: The VTableUses vector could grow as a result of marking
	// the members of a class as "used", so we check the size each
	// time through the loop and prefer indices (which are stable) to
	// iterators (which are not).
	bool DefinedAnything = false;
	for (unsigned I = 0; I != VTableUses.size(); ++I) {
	CXXRecordDecl *Class = VTableUses[I].first->getDefinition();
	if (!Class)
	continue;
	TemplateSpecializationKind ClassTSK =
	Class->getTemplateSpecializationKind();

	SourceLocation Loc = VTableUses[I].second;

	bool DefineVTable = true;

	// If this class has a key function, but that key function is
	// defined in another translation unit, we don't need to emit the
	// vtable even though we're using it.
	const CXXMethodDecl *KeyFunction = Context.getCurrentKeyFunction(Class);
	if (KeyFunction && !KeyFunction->hasBody()) {
	// The key function is in another translation unit.
	DefineVTable = false;
	TemplateSpecializationKind TSK =
	KeyFunction->getTemplateSpecializationKind();
	assert(TSK != TSK_ExplicitInstantiationDefinition &&
	TSK != TSK_ImplicitInstantiation &&
	"Instantiations don't have key functions");
	(void)TSK;
	} else if (!KeyFunction) {
	// If we have a class with no key function that is the subject
	// of an explicit instantiation declaration, suppress the
	// vtable; it will live with the explicit instantiation
	// definition.
	bool IsExplicitInstantiationDeclaration =
	ClassTSK == TSK_ExplicitInstantiationDeclaration;
	for (auto R : Class->redecls()) {
	TemplateSpecializationKind TSK
	= cast<CXXRecordDecl>(R)->getTemplateSpecializationKind();
	if (TSK == TSK_ExplicitInstantiationDeclaration)
	IsExplicitInstantiationDeclaration = true;
	else if (TSK == TSK_ExplicitInstantiationDefinition) {
	IsExplicitInstantiationDeclaration = false;
	break;
	}
	}

	if (IsExplicitInstantiationDeclaration)
	DefineVTable = false;
	}

	// The exception specifications for all virtual members may be needed even
	// if we are not providing an authoritative form of the vtable in this TU.
	// We may choose to emit it available_externally anyway.
	if (!DefineVTable) {
	MarkVirtualMemberExceptionSpecsNeeded(Loc, Class);
	continue;
	}

	// Mark all of the virtual members of this class as referenced, so
	// that we can build a vtable. Then, tell the AST consumer that a
	// vtable for this class is required.
	DefinedAnything = true;
	MarkVirtualMembersReferenced(Loc, Class);
	CXXRecordDecl *Canonical = cast<CXXRecordDecl>(Class->getCanonicalDecl());
	if (VTablesUsed[Canonical])
	Consumer.HandleVTable(Class);

	// Warn if we're emitting a weak vtable. The vtable will be weak if there is
	// no key function or the key function is inlined. Don't warn in C++ ABIs
	// that lack key functions, since the user won't be able to make one.
	if (Context.getTargetInfo().getCXXABI().hasKeyFunctions() &&
	Class->isExternallyVisible() && ClassTSK != TSK_ImplicitInstantiation) {
	const FunctionDecl *KeyFunctionDef = nullptr;
	if (!KeyFunction \|\| (KeyFunction->hasBody(KeyFunctionDef) &&
	KeyFunctionDef->isInlined())) {
	Diag(Class->getLocation(),
	ClassTSK == TSK_ExplicitInstantiationDefinition
	? diag::warn_weak_template_vtable
	: diag::warn_weak_vtable)
	<< Class;
	}
	}
	}
	VTableUses.clear();

	return DefinedAnything;
	}

	void Sema::MarkVirtualMemberExceptionSpecsNeeded(SourceLocation Loc,
	const CXXRecordDecl *RD) {
	for (const auto *I : RD->methods())
	if (I->isVirtual() && !I->isPure())
	ResolveExceptionSpec(Loc, I->getType()->castAs<FunctionProtoType>());
	}

	void Sema::MarkVirtualMembersReferenced(SourceLocation Loc,
	const CXXRecordDecl *RD) {
	// Mark all functions which will appear in RD's vtable as used.
	CXXFinalOverriderMap FinalOverriders;
	RD->getFinalOverriders(FinalOverriders);
	for (CXXFinalOverriderMap::const_iterator I = FinalOverriders.begin(),
	E = FinalOverriders.end();
	I != E; ++I) {
	for (OverridingMethods::const_iterator OI = I->second.begin(),
	OE = I->second.end();
	OI != OE; ++OI) {
	assert(OI->second.size() > 0 && "no final overrider");
	CXXMethodDecl *Overrider = OI->second.front().Method;

	// C++ [basic.def.odr]p2:
	// [...] A virtual member function is used if it is not pure. [...]
	if (!Overrider->isPure())
	MarkFunctionReferenced(Loc, Overrider);
	}
	}

	// Only classes that have virtual bases need a VTT.
	if (RD->getNumVBases() == 0)
	return;

	for (const auto &I : RD->bases()) {
	const CXXRecordDecl *Base =
	cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
	if (Base->getNumVBases() == 0)
	continue;
	MarkVirtualMembersReferenced(Loc, Base);
	}
	}

	/// SetIvarInitializers - This routine builds initialization ASTs for the
	/// Objective-C implementation whose ivars need be initialized.
	void Sema::SetIvarInitializers(ObjCImplementationDecl *ObjCImplementation) {
	if (!getLangOpts().CPlusPlus)
	return;
	if (ObjCInterfaceDecl *OID = ObjCImplementation->getClassInterface()) {
	SmallVector<ObjCIvarDecl*, 8> ivars;
	CollectIvarsToConstructOrDestruct(OID, ivars);
	if (ivars.empty())
	return;
	SmallVector<CXXCtorInitializer*, 32> AllToInit;
	for (unsigned i = 0; i < ivars.size(); i++) {
	FieldDecl *Field = ivars[i];
	if (Field->isInvalidDecl())
	continue;

	CXXCtorInitializer *Member;
	InitializedEntity InitEntity = InitializedEntity::InitializeMember(Field);
	InitializationKind InitKind =
	InitializationKind::CreateDefault(ObjCImplementation->getLocation());

	InitializationSequence InitSeq(*this, InitEntity, InitKind, None);
	ExprResult MemberInit =
	InitSeq.Perform(*this, InitEntity, InitKind, None);
	MemberInit = MaybeCreateExprWithCleanups(MemberInit);
	// Note, MemberInit could actually come back empty if no initialization
	// is required (e.g., because it would call a trivial default constructor)
	if (!MemberInit.get() \|\| MemberInit.isInvalid())
	continue;

	Member =
	new (Context) CXXCtorInitializer(Context, Field, SourceLocation(),
	SourceLocation(),
	MemberInit.getAs<Expr>(),
	SourceLocation());
	AllToInit.push_back(Member);

	// Be sure that the destructor is accessible and is marked as referenced.
	if (const RecordType *RecordTy =
	Context.getBaseElementType(Field->getType())
	->getAs<RecordType>()) {
	CXXRecordDecl *RD = cast<CXXRecordDecl>(RecordTy->getDecl());
	if (CXXDestructorDecl *Destructor = LookupDestructor(RD)) {
	MarkFunctionReferenced(Field->getLocation(), Destructor);
	CheckDestructorAccess(Field->getLocation(), Destructor,
	PDiag(diag::err_access_dtor_ivar)
	<< Context.getBaseElementType(Field->getType()));
	}
	}
	}
	ObjCImplementation->setIvarInitializers(Context,
	AllToInit.data(), AllToInit.size());
	}
	}

	static
	void DelegatingCycleHelper(CXXConstructorDecl* Ctor,
	llvm::SmallSet<CXXConstructorDecl*, 4> &Valid,
	llvm::SmallSet<CXXConstructorDecl*, 4> &Invalid,
	llvm::SmallSet<CXXConstructorDecl*, 4> &Current,
	Sema &S) {
	if (Ctor->isInvalidDecl())
	return;

	CXXConstructorDecl *Target = Ctor->getTargetConstructor();

	// Target may not be determinable yet, for instance if this is a dependent
	// call in an uninstantiated template.
	if (Target) {
	const FunctionDecl *FNTarget = nullptr;
	(void)Target->hasBody(FNTarget);
	Target = const_cast<CXXConstructorDecl*>(
	cast_or_null<CXXConstructorDecl>(FNTarget));
	}

	CXXConstructorDecl *Canonical = Ctor->getCanonicalDecl(),
	// Avoid dereferencing a null pointer here.
	*TCanonical = Target? Target->getCanonicalDecl() : nullptr;

	if (!Current.insert(Canonical).second)
	return;

	// We know that beyond here, we aren't chaining into a cycle.
	if (!Target \|\| !Target->isDelegatingConstructor() \|\|
	Target->isInvalidDecl() \|\| Valid.count(TCanonical)) {
	Valid.insert(Current.begin(), Current.end());
	Current.clear();
	// We've hit a cycle.
	} else if (TCanonical == Canonical \|\| Invalid.count(TCanonical) \|\|
	Current.count(TCanonical)) {
	// If we haven't diagnosed this cycle yet, do so now.
	if (!Invalid.count(TCanonical)) {
	S.Diag((*Ctor->init_begin())->getSourceLocation(),
	diag::warn_delegating_ctor_cycle)
	<< Ctor;

	// Don't add a note for a function delegating directly to itself.
	if (TCanonical != Canonical)
	S.Diag(Target->getLocation(), diag::note_it_delegates_to);

	CXXConstructorDecl *C = Target;
	while (C->getCanonicalDecl() != Canonical) {
	const FunctionDecl *FNTarget = nullptr;
	(void)C->getTargetConstructor()->hasBody(FNTarget);
	assert(FNTarget && "Ctor cycle through bodiless function");

	C = const_cast<CXXConstructorDecl*>(
	cast<CXXConstructorDecl>(FNTarget));
	S.Diag(C->getLocation(), diag::note_which_delegates_to);
	}
	}

	Invalid.insert(Current.begin(), Current.end());
	Current.clear();
	} else {
	DelegatingCycleHelper(Target, Valid, Invalid, Current, S);
	}
	}


	void Sema::CheckDelegatingCtorCycles() {
	llvm::SmallSet<CXXConstructorDecl*, 4> Valid, Invalid, Current;

	for (DelegatingCtorDeclsType::iterator
	I = DelegatingCtorDecls.begin(ExternalSource),
	E = DelegatingCtorDecls.end();
	I != E; ++I)
	DelegatingCycleHelper(I, Valid, Invalid, Current, this);

	for (llvm::SmallSet<CXXConstructorDecl *, 4>::iterator CI = Invalid.begin(),
	CE = Invalid.end();
	CI != CE; ++CI)
	(*CI)->setInvalidDecl();
	}

	namespace {
	/// \brief AST visitor that finds references to the 'this' expression.
	class FindCXXThisExpr : public RecursiveASTVisitor<FindCXXThisExpr> {
	Sema &S;

	public:
	explicit FindCXXThisExpr(Sema &S) : S(S) { }

	bool VisitCXXThisExpr(CXXThisExpr *E) {
	S.Diag(E->getLocation(), diag::err_this_static_member_func)
	<< E->isImplicit();
	return false;
	}
	};
	}

	bool Sema::checkThisInStaticMemberFunctionType(CXXMethodDecl *Method) {
	TypeSourceInfo *TSInfo = Method->getTypeSourceInfo();
	if (!TSInfo)
	return false;

	TypeLoc TL = TSInfo->getTypeLoc();
	FunctionProtoTypeLoc ProtoTL = TL.getAs<FunctionProtoTypeLoc>();
	if (!ProtoTL)
	return false;

	// C++11 [expr.prim.general]p3:
	// [The expression this] shall not appear before the optional
	// cv-qualifier-seq and it shall not appear within the declaration of a
	// static member function (although its type and value category are defined
	// within a static member function as they are within a non-static member
	// function). [ Note: this is because declaration matching does not occur
	// until the complete declarator is known. - end note ]
	const FunctionProtoType *Proto = ProtoTL.getTypePtr();
	FindCXXThisExpr Finder(*this);

	// If the return type came after the cv-qualifier-seq, check it now.
	if (Proto->hasTrailingReturn() &&
	!Finder.TraverseTypeLoc(ProtoTL.getReturnLoc()))
	return true;

	// Check the exception specification.
	if (checkThisInStaticMemberFunctionExceptionSpec(Method))
	return true;

	return checkThisInStaticMemberFunctionAttributes(Method);
	}

	bool Sema::checkThisInStaticMemberFunctionExceptionSpec(CXXMethodDecl *Method) {
	TypeSourceInfo *TSInfo = Method->getTypeSourceInfo();
	if (!TSInfo)
	return false;

	TypeLoc TL = TSInfo->getTypeLoc();
	FunctionProtoTypeLoc ProtoTL = TL.getAs<FunctionProtoTypeLoc>();
	if (!ProtoTL)
	return false;

	const FunctionProtoType *Proto = ProtoTL.getTypePtr();
	FindCXXThisExpr Finder(*this);

	switch (Proto->getExceptionSpecType()) {
	case EST_Unparsed:
	case EST_Uninstantiated:
	case EST_Unevaluated:
	case EST_BasicNoexcept:
	case EST_DynamicNone:
	case EST_MSAny:
	case EST_None:
	break;

	case EST_ComputedNoexcept:
	if (!Finder.TraverseStmt(Proto->getNoexceptExpr()))
	return true;
	LLVM_FALLTHROUGH;

	case EST_Dynamic:
	for (const auto &E : Proto->exceptions()) {
	if (!Finder.TraverseType(E))
	return true;
	}
	break;
	}

	return false;
	}

	bool Sema::checkThisInStaticMemberFunctionAttributes(CXXMethodDecl *Method) {
	FindCXXThisExpr Finder(*this);

	// Check attributes.
	for (const auto *A : Method->attrs()) {
	// FIXME: This should be emitted by tblgen.
	Expr *Arg = nullptr;
	ArrayRef<Expr *> Args;
	if (const auto *G = dyn_cast<GuardedByAttr>(A))
	Arg = G->getArg();
	else if (const auto *G = dyn_cast<PtGuardedByAttr>(A))
	Arg = G->getArg();
	else if (const auto *AA = dyn_cast<AcquiredAfterAttr>(A))
	Args = llvm::makeArrayRef(AA->args_begin(), AA->args_size());
	else if (const auto *AB = dyn_cast<AcquiredBeforeAttr>(A))
	Args = llvm::makeArrayRef(AB->args_begin(), AB->args_size());
	else if (const auto *ETLF = dyn_cast<ExclusiveTrylockFunctionAttr>(A)) {
	Arg = ETLF->getSuccessValue();
	Args = llvm::makeArrayRef(ETLF->args_begin(), ETLF->args_size());
	} else if (const auto *STLF = dyn_cast<SharedTrylockFunctionAttr>(A)) {
	Arg = STLF->getSuccessValue();
	Args = llvm::makeArrayRef(STLF->args_begin(), STLF->args_size());
	} else if (const auto *LR = dyn_cast<LockReturnedAttr>(A))
	Arg = LR->getArg();
	else if (const auto *LE = dyn_cast<LocksExcludedAttr>(A))
	Args = llvm::makeArrayRef(LE->args_begin(), LE->args_size());
	else if (const auto *RC = dyn_cast<RequiresCapabilityAttr>(A))
	Args = llvm::makeArrayRef(RC->args_begin(), RC->args_size());
	else if (const auto *AC = dyn_cast<AcquireCapabilityAttr>(A))
	Args = llvm::makeArrayRef(AC->args_begin(), AC->args_size());
	else if (const auto *AC = dyn_cast<TryAcquireCapabilityAttr>(A))
	Args = llvm::makeArrayRef(AC->args_begin(), AC->args_size());
	else if (const auto *RC = dyn_cast<ReleaseCapabilityAttr>(A))
	Args = llvm::makeArrayRef(RC->args_begin(), RC->args_size());

	if (Arg && !Finder.TraverseStmt(Arg))
	return true;

	for (unsigned I = 0, N = Args.size(); I != N; ++I) {
	if (!Finder.TraverseStmt(Args[I]))
	return true;
	}
	}

	return false;
	}

	void Sema::checkExceptionSpecification(
	bool IsTopLevel, ExceptionSpecificationType EST,
	ArrayRef<ParsedType> DynamicExceptions,
	ArrayRef<SourceRange> DynamicExceptionRanges, Expr *NoexceptExpr,
	SmallVectorImpl<QualType> &Exceptions,
	FunctionProtoType::ExceptionSpecInfo &ESI) {
	Exceptions.clear();
	ESI.Type = EST;
	if (EST == EST_Dynamic) {
	Exceptions.reserve(DynamicExceptions.size());
	for (unsigned ei = 0, ee = DynamicExceptions.size(); ei != ee; ++ei) {
	// FIXME: Preserve type source info.
	QualType ET = GetTypeFromParser(DynamicExceptions[ei]);

	if (IsTopLevel) {
	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	collectUnexpandedParameterPacks(ET, Unexpanded);
	if (!Unexpanded.empty()) {
	DiagnoseUnexpandedParameterPacks(
	DynamicExceptionRanges[ei].getBegin(), UPPC_ExceptionType,
	Unexpanded);
	continue;
	}
	}

	// Check that the type is valid for an exception spec, and
	// drop it if not.
	if (!CheckSpecifiedExceptionType(ET, DynamicExceptionRanges[ei]))
	Exceptions.push_back(ET);
	}
	ESI.Exceptions = Exceptions;
	return;
	}

	if (EST == EST_ComputedNoexcept) {
	// If an error occurred, there's no expression here.
	if (NoexceptExpr) {
	assert((NoexceptExpr->isTypeDependent() \|\|
	NoexceptExpr->getType()->getCanonicalTypeUnqualified() ==
	Context.BoolTy) &&
	"Parser should have made sure that the expression is boolean");
	if (IsTopLevel && NoexceptExpr &&
	DiagnoseUnexpandedParameterPack(NoexceptExpr)) {
	ESI.Type = EST_BasicNoexcept;
	return;
	}

	if (!NoexceptExpr->isValueDependent())
	NoexceptExpr = VerifyIntegerConstantExpression(NoexceptExpr, nullptr,
	diag::err_noexcept_needs_constant_expression,
	/AllowFold/ false).get();
	ESI.NoexceptExpr = NoexceptExpr;
	}
	return;
	}
	}

	void Sema::actOnDelayedExceptionSpecification(Decl *MethodD,
	ExceptionSpecificationType EST,
	SourceRange SpecificationRange,
	ArrayRef<ParsedType> DynamicExceptions,
	ArrayRef<SourceRange> DynamicExceptionRanges,
	Expr *NoexceptExpr) {
	if (!MethodD)
	return;

	// Dig out the method we're referring to.
	if (FunctionTemplateDecl *FunTmpl = dyn_cast<FunctionTemplateDecl>(MethodD))
	MethodD = FunTmpl->getTemplatedDecl();

	CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(MethodD);
	if (!Method)
	return;

	// Check the exception specification.
	llvm::SmallVector<QualType, 4> Exceptions;
	FunctionProtoType::ExceptionSpecInfo ESI;
	checkExceptionSpecification(/IsTopLevel/true, EST, DynamicExceptions,
	DynamicExceptionRanges, NoexceptExpr, Exceptions,
	ESI);

	// Update the exception specification on the function type.
	Context.adjustExceptionSpec(Method, ESI, /AsWritten/true);

	if (Method->isStatic())
	checkThisInStaticMemberFunctionExceptionSpec(Method);

	if (Method->isVirtual()) {
	// Check overrides, which we previously had to delay.
	for (CXXMethodDecl::method_iterator O = Method->begin_overridden_methods(),
	OEnd = Method->end_overridden_methods();
	O != OEnd; ++O)
	CheckOverridingFunctionExceptionSpec(Method, *O);
	}
	}

	/// HandleMSProperty - Analyze a __delcspec(property) field of a C++ class.
	///
	MSPropertyDecl Sema::HandleMSProperty(Scope S, RecordDecl *Record,
	SourceLocation DeclStart,
	Declarator &D, Expr *BitWidth,
	InClassInitStyle InitStyle,
	AccessSpecifier AS,
	AttributeList *MSPropertyAttr) {
	IdentifierInfo *II = D.getIdentifier();
	if (!II) {
	Diag(DeclStart, diag::err_anonymous_property);
	return nullptr;
	}
	SourceLocation Loc = D.getIdentifierLoc();

	TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
	QualType T = TInfo->getType();
	if (getLangOpts().CPlusPlus) {
	CheckExtraCXXDefaultArguments(D);

	if (DiagnoseUnexpandedParameterPack(D.getIdentifierLoc(), TInfo,
	UPPC_DataMemberType)) {
	D.setInvalidType();
	T = Context.IntTy;
	TInfo = Context.getTrivialTypeSourceInfo(T, Loc);
	}
	}

	DiagnoseFunctionSpecifiers(D.getDeclSpec());

	if (D.getDeclSpec().isInlineSpecified())
	Diag(D.getDeclSpec().getInlineSpecLoc(), diag::err_inline_non_function)
	<< getLangOpts().CPlusPlus1z;
	if (DeclSpec::TSCS TSCS = D.getDeclSpec().getThreadStorageClassSpec())
	Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
	diag::err_invalid_thread)
	<< DeclSpec::getSpecifierName(TSCS);

	// Check to see if this name was declared as a member previously
	NamedDecl *PrevDecl = nullptr;
	LookupResult Previous(*this, II, Loc, LookupMemberName, ForRedeclaration);
	LookupName(Previous, S);
	switch (Previous.getResultKind()) {
	case LookupResult::Found:
	case LookupResult::FoundUnresolvedValue:
	PrevDecl = Previous.getAsSingle<NamedDecl>();
	break;

	case LookupResult::FoundOverloaded:
	PrevDecl = Previous.getRepresentativeDecl();
	break;

	case LookupResult::NotFound:
	case LookupResult::NotFoundInCurrentInstantiation:
	case LookupResult::Ambiguous:
	break;
	}

	if (PrevDecl && PrevDecl->isTemplateParameter()) {
	// Maybe we will complain about the shadowed template parameter.
	DiagnoseTemplateParameterShadow(D.getIdentifierLoc(), PrevDecl);
	// Just pretend that we didn't see the previous declaration.
	PrevDecl = nullptr;
	}

	if (PrevDecl && !isDeclInScope(PrevDecl, Record, S))
	PrevDecl = nullptr;

	SourceLocation TSSL = D.getLocStart();
	const AttributeList::PropertyData &Data = MSPropertyAttr->getPropertyData();
	MSPropertyDecl *NewPD = MSPropertyDecl::Create(
	Context, Record, Loc, II, T, TInfo, TSSL, Data.GetterId, Data.SetterId);
	ProcessDeclAttributes(TUScope, NewPD, D);
	NewPD->setAccess(AS);

	if (NewPD->isInvalidDecl())
	Record->setInvalidDecl();

	if (D.getDeclSpec().isModulePrivateSpecified())
	NewPD->setModulePrivate();

	if (NewPD->isInvalidDecl() && PrevDecl) {
	// Don't introduce NewFD into scope; there's already something
	// with the same name in the same scope.
	} else if (II) {
	PushOnScopeChains(NewPD, S);
	} else
	Record->addDecl(NewPD);

	return NewPD;
	}
	Index: head/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp (revision 322855)
	@@ -1,2681 +1,2681 @@
	//===--- SemaObjCProperty.cpp - Semantic Analysis for ObjC @property ------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements semantic analysis for Objective C @property and
	// @synthesize declarations.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/Sema/SemaInternal.h"
	#include "clang/AST/ASTMutationListener.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/ExprObjC.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Lex/Lexer.h"
	#include "clang/Lex/Preprocessor.h"
	#include "clang/Sema/Initialization.h"
	#include "llvm/ADT/DenseSet.h"
	#include "llvm/ADT/SmallString.h"

	using namespace clang;

	//===----------------------------------------------------------------------===//
	// Grammar actions.
	//===----------------------------------------------------------------------===//

	/// getImpliedARCOwnership - Given a set of property attributes and a
	/// type, infer an expected lifetime. The type's ownership qualification
	/// is not considered.
	///
	/// Returns OCL_None if the attributes as stated do not imply an ownership.
	/// Never returns OCL_Autoreleasing.
	static Qualifiers::ObjCLifetime getImpliedARCOwnership(
	ObjCPropertyDecl::PropertyAttributeKind attrs,
	QualType type) {
	// retain, strong, copy, weak, and unsafe_unretained are only legal
	// on properties of retainable pointer type.
	if (attrs & (ObjCPropertyDecl::OBJC_PR_retain \|
	ObjCPropertyDecl::OBJC_PR_strong \|
	ObjCPropertyDecl::OBJC_PR_copy)) {
	return Qualifiers::OCL_Strong;
	} else if (attrs & ObjCPropertyDecl::OBJC_PR_weak) {
	return Qualifiers::OCL_Weak;
	} else if (attrs & ObjCPropertyDecl::OBJC_PR_unsafe_unretained) {
	return Qualifiers::OCL_ExplicitNone;
	}

	// assign can appear on other types, so we have to check the
	// property type.
	if (attrs & ObjCPropertyDecl::OBJC_PR_assign &&
	type->isObjCRetainableType()) {
	return Qualifiers::OCL_ExplicitNone;
	}

	return Qualifiers::OCL_None;
	}

	/// Check the internal consistency of a property declaration with
	/// an explicit ownership qualifier.
	static void checkPropertyDeclWithOwnership(Sema &S,
	ObjCPropertyDecl *property) {
	if (property->isInvalidDecl()) return;

	ObjCPropertyDecl::PropertyAttributeKind propertyKind
	= property->getPropertyAttributes();
	Qualifiers::ObjCLifetime propertyLifetime
	= property->getType().getObjCLifetime();

	assert(propertyLifetime != Qualifiers::OCL_None);

	Qualifiers::ObjCLifetime expectedLifetime
	= getImpliedARCOwnership(propertyKind, property->getType());
	if (!expectedLifetime) {
	// We have a lifetime qualifier but no dominating property
	// attribute. That's okay, but restore reasonable invariants by
	// setting the property attribute according to the lifetime
	// qualifier.
	ObjCPropertyDecl::PropertyAttributeKind attr;
	if (propertyLifetime == Qualifiers::OCL_Strong) {
	attr = ObjCPropertyDecl::OBJC_PR_strong;
	} else if (propertyLifetime == Qualifiers::OCL_Weak) {
	attr = ObjCPropertyDecl::OBJC_PR_weak;
	} else {
	assert(propertyLifetime == Qualifiers::OCL_ExplicitNone);
	attr = ObjCPropertyDecl::OBJC_PR_unsafe_unretained;
	}
	property->setPropertyAttributes(attr);
	return;
	}

	if (propertyLifetime == expectedLifetime) return;

	property->setInvalidDecl();
	S.Diag(property->getLocation(),
	diag::err_arc_inconsistent_property_ownership)
	<< property->getDeclName()
	<< expectedLifetime
	<< propertyLifetime;
	}

	/// \brief Check this Objective-C property against a property declared in the
	/// given protocol.
	static void
	CheckPropertyAgainstProtocol(Sema &S, ObjCPropertyDecl *Prop,
	ObjCProtocolDecl *Proto,
	llvm::SmallPtrSetImpl<ObjCProtocolDecl *> &Known) {
	// Have we seen this protocol before?
	if (!Known.insert(Proto).second)
	return;

	// Look for a property with the same name.
	DeclContext::lookup_result R = Proto->lookup(Prop->getDeclName());
	for (unsigned I = 0, N = R.size(); I != N; ++I) {
	if (ObjCPropertyDecl *ProtoProp = dyn_cast<ObjCPropertyDecl>(R[I])) {
	S.DiagnosePropertyMismatch(Prop, ProtoProp, Proto->getIdentifier(), true);
	return;
	}
	}

	// Check this property against any protocols we inherit.
	for (auto *P : Proto->protocols())
	CheckPropertyAgainstProtocol(S, Prop, P, Known);
	}

	static unsigned deducePropertyOwnershipFromType(Sema &S, QualType T) {
	// In GC mode, just look for the __weak qualifier.
	if (S.getLangOpts().getGC() != LangOptions::NonGC) {
	if (T.isObjCGCWeak()) return ObjCDeclSpec::DQ_PR_weak;

	// In ARC/MRC, look for an explicit ownership qualifier.
	// For some reason, this only applies to __weak.
	} else if (auto ownership = T.getObjCLifetime()) {
	switch (ownership) {
	case Qualifiers::OCL_Weak:
	return ObjCDeclSpec::DQ_PR_weak;
	case Qualifiers::OCL_Strong:
	return ObjCDeclSpec::DQ_PR_strong;
	case Qualifiers::OCL_ExplicitNone:
	return ObjCDeclSpec::DQ_PR_unsafe_unretained;
	case Qualifiers::OCL_Autoreleasing:
	case Qualifiers::OCL_None:
	return 0;
	}
	llvm_unreachable("bad qualifier");
	}

	return 0;
	}

	static const unsigned OwnershipMask =
	(ObjCPropertyDecl::OBJC_PR_assign \|
	ObjCPropertyDecl::OBJC_PR_retain \|
	ObjCPropertyDecl::OBJC_PR_copy \|
	ObjCPropertyDecl::OBJC_PR_weak \|
	ObjCPropertyDecl::OBJC_PR_strong \|
	ObjCPropertyDecl::OBJC_PR_unsafe_unretained);

	static unsigned getOwnershipRule(unsigned attr) {
	unsigned result = attr & OwnershipMask;

	// From an ownership perspective, assign and unsafe_unretained are
	// identical; make sure one also implies the other.
	if (result & (ObjCPropertyDecl::OBJC_PR_assign \|
	ObjCPropertyDecl::OBJC_PR_unsafe_unretained)) {
	result \|= ObjCPropertyDecl::OBJC_PR_assign \|
	ObjCPropertyDecl::OBJC_PR_unsafe_unretained;
	}

	return result;
	}

	Decl Sema::ActOnProperty(Scope S, SourceLocation AtLoc,
	SourceLocation LParenLoc,
	FieldDeclarator &FD,
	ObjCDeclSpec &ODS,
	Selector GetterSel,
	Selector SetterSel,
	tok::ObjCKeywordKind MethodImplKind,
	DeclContext *lexicalDC) {
	unsigned Attributes = ODS.getPropertyAttributes();
	FD.D.setObjCWeakProperty((Attributes & ObjCDeclSpec::DQ_PR_weak) != 0);
	TypeSourceInfo *TSI = GetTypeForDeclarator(FD.D, S);
	QualType T = TSI->getType();
	if (!getOwnershipRule(Attributes)) {
	Attributes \|= deducePropertyOwnershipFromType(*this, T);
	}
	bool isReadWrite = ((Attributes & ObjCDeclSpec::DQ_PR_readwrite) \|\|
	// default is readwrite!
	!(Attributes & ObjCDeclSpec::DQ_PR_readonly));

	// Proceed with constructing the ObjCPropertyDecls.
	ObjCContainerDecl *ClassDecl = cast<ObjCContainerDecl>(CurContext);
	ObjCPropertyDecl *Res = nullptr;
	if (ObjCCategoryDecl *CDecl = dyn_cast<ObjCCategoryDecl>(ClassDecl)) {
	if (CDecl->IsClassExtension()) {
	Res = HandlePropertyInClassExtension(S, AtLoc, LParenLoc,
	FD,
	GetterSel, ODS.getGetterNameLoc(),
	SetterSel, ODS.getSetterNameLoc(),
	isReadWrite, Attributes,
	ODS.getPropertyAttributes(),
	T, TSI, MethodImplKind);
	if (!Res)
	return nullptr;
	}
	}

	if (!Res) {
	Res = CreatePropertyDecl(S, ClassDecl, AtLoc, LParenLoc, FD,
	GetterSel, ODS.getGetterNameLoc(), SetterSel,
	ODS.getSetterNameLoc(), isReadWrite, Attributes,
	ODS.getPropertyAttributes(), T, TSI,
	MethodImplKind);
	if (lexicalDC)
	Res->setLexicalDeclContext(lexicalDC);
	}

	// Validate the attributes on the @property.
	CheckObjCPropertyAttributes(Res, AtLoc, Attributes,
	(isa<ObjCInterfaceDecl>(ClassDecl) \|\|
	isa<ObjCProtocolDecl>(ClassDecl)));

	// Check consistency if the type has explicit ownership qualification.
	if (Res->getType().getObjCLifetime())
	checkPropertyDeclWithOwnership(*this, Res);

	llvm::SmallPtrSet<ObjCProtocolDecl *, 16> KnownProtos;
	if (ObjCInterfaceDecl *IFace = dyn_cast<ObjCInterfaceDecl>(ClassDecl)) {
	// For a class, compare the property against a property in our superclass.
	bool FoundInSuper = false;
	ObjCInterfaceDecl *CurrentInterfaceDecl = IFace;
	while (ObjCInterfaceDecl *Super = CurrentInterfaceDecl->getSuperClass()) {
	DeclContext::lookup_result R = Super->lookup(Res->getDeclName());
	for (unsigned I = 0, N = R.size(); I != N; ++I) {
	if (ObjCPropertyDecl *SuperProp = dyn_cast<ObjCPropertyDecl>(R[I])) {
	DiagnosePropertyMismatch(Res, SuperProp, Super->getIdentifier(), false);
	FoundInSuper = true;
	break;
	}
	}
	if (FoundInSuper)
	break;
	else
	CurrentInterfaceDecl = Super;
	}

	if (FoundInSuper) {
	// Also compare the property against a property in our protocols.
	for (auto *P : CurrentInterfaceDecl->protocols()) {
	CheckPropertyAgainstProtocol(*this, Res, P, KnownProtos);
	}
	} else {
	// Slower path: look in all protocols we referenced.
	for (auto *P : IFace->all_referenced_protocols()) {
	CheckPropertyAgainstProtocol(*this, Res, P, KnownProtos);
	}
	}
	} else if (ObjCCategoryDecl *Cat = dyn_cast<ObjCCategoryDecl>(ClassDecl)) {
	// We don't check if class extension. Because properties in class extension
	// are meant to override some of the attributes and checking has already done
	// when property in class extension is constructed.
	if (!Cat->IsClassExtension())
	for (auto *P : Cat->protocols())
	CheckPropertyAgainstProtocol(*this, Res, P, KnownProtos);
	} else {
	ObjCProtocolDecl *Proto = cast<ObjCProtocolDecl>(ClassDecl);
	for (auto *P : Proto->protocols())
	CheckPropertyAgainstProtocol(*this, Res, P, KnownProtos);
	}

	ActOnDocumentableDecl(Res);
	return Res;
	}

	static ObjCPropertyDecl::PropertyAttributeKind
	makePropertyAttributesAsWritten(unsigned Attributes) {
	unsigned attributesAsWritten = 0;
	if (Attributes & ObjCDeclSpec::DQ_PR_readonly)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_readonly;
	if (Attributes & ObjCDeclSpec::DQ_PR_readwrite)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_readwrite;
	if (Attributes & ObjCDeclSpec::DQ_PR_getter)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_getter;
	if (Attributes & ObjCDeclSpec::DQ_PR_setter)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_setter;
	if (Attributes & ObjCDeclSpec::DQ_PR_assign)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_assign;
	if (Attributes & ObjCDeclSpec::DQ_PR_retain)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_retain;
	if (Attributes & ObjCDeclSpec::DQ_PR_strong)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_strong;
	if (Attributes & ObjCDeclSpec::DQ_PR_weak)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_weak;
	if (Attributes & ObjCDeclSpec::DQ_PR_copy)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_copy;
	if (Attributes & ObjCDeclSpec::DQ_PR_unsafe_unretained)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_unsafe_unretained;
	if (Attributes & ObjCDeclSpec::DQ_PR_nonatomic)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_nonatomic;
	if (Attributes & ObjCDeclSpec::DQ_PR_atomic)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_atomic;
	if (Attributes & ObjCDeclSpec::DQ_PR_class)
	attributesAsWritten \|= ObjCPropertyDecl::OBJC_PR_class;

	return (ObjCPropertyDecl::PropertyAttributeKind)attributesAsWritten;
	}

	static bool LocPropertyAttribute( ASTContext &Context, const char *attrName,
	SourceLocation LParenLoc, SourceLocation &Loc) {
	if (LParenLoc.isMacroID())
	return false;

	SourceManager &SM = Context.getSourceManager();
	std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(LParenLoc);
	// Try to load the file buffer.
	bool invalidTemp = false;
	StringRef file = SM.getBufferData(locInfo.first, &invalidTemp);
	if (invalidTemp)
	return false;
	const char *tokenBegin = file.data() + locInfo.second;

	// Lex from the start of the given location.
	Lexer lexer(SM.getLocForStartOfFile(locInfo.first),
	Context.getLangOpts(),
	file.begin(), tokenBegin, file.end());
	Token Tok;
	do {
	lexer.LexFromRawLexer(Tok);
	if (Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == attrName) {
	Loc = Tok.getLocation();
	return true;
	}
	} while (Tok.isNot(tok::r_paren));
	return false;
	}

	/// Check for a mismatch in the atomicity of the given properties.
	static void checkAtomicPropertyMismatch(Sema &S,
	ObjCPropertyDecl *OldProperty,
	ObjCPropertyDecl *NewProperty,
	bool PropagateAtomicity) {
	// If the atomicity of both matches, we're done.
	bool OldIsAtomic =
	(OldProperty->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_nonatomic)
	== 0;
	bool NewIsAtomic =
	(NewProperty->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_nonatomic)
	== 0;
	if (OldIsAtomic == NewIsAtomic) return;

	// Determine whether the given property is readonly and implicitly
	// atomic.
	auto isImplicitlyReadonlyAtomic = [](ObjCPropertyDecl *Property) -> bool {
	// Is it readonly?
	auto Attrs = Property->getPropertyAttributes();
	if ((Attrs & ObjCPropertyDecl::OBJC_PR_readonly) == 0) return false;

	// Is it nonatomic?
	if (Attrs & ObjCPropertyDecl::OBJC_PR_nonatomic) return false;

	// Was 'atomic' specified directly?
	if (Property->getPropertyAttributesAsWritten() &
	ObjCPropertyDecl::OBJC_PR_atomic)
	return false;

	return true;
	};

	// If we're allowed to propagate atomicity, and the new property did
	// not specify atomicity at all, propagate.
	const unsigned AtomicityMask =
	(ObjCPropertyDecl::OBJC_PR_atomic \| ObjCPropertyDecl::OBJC_PR_nonatomic);
	if (PropagateAtomicity &&
	((NewProperty->getPropertyAttributesAsWritten() & AtomicityMask) == 0)) {
	unsigned Attrs = NewProperty->getPropertyAttributes();
	Attrs = Attrs & ~AtomicityMask;
	if (OldIsAtomic)
	Attrs \|= ObjCPropertyDecl::OBJC_PR_atomic;
	else
	Attrs \|= ObjCPropertyDecl::OBJC_PR_nonatomic;

	NewProperty->overwritePropertyAttributes(Attrs);
	return;
	}

	// One of the properties is atomic; if it's a readonly property, and
	// 'atomic' wasn't explicitly specified, we're okay.
	if ((OldIsAtomic && isImplicitlyReadonlyAtomic(OldProperty)) \|\|
	(NewIsAtomic && isImplicitlyReadonlyAtomic(NewProperty)))
	return;

	// Diagnose the conflict.
	const IdentifierInfo *OldContextName;
	auto *OldDC = OldProperty->getDeclContext();
	if (auto Category = dyn_cast<ObjCCategoryDecl>(OldDC))
	OldContextName = Category->getClassInterface()->getIdentifier();
	else
	OldContextName = cast<ObjCContainerDecl>(OldDC)->getIdentifier();

	S.Diag(NewProperty->getLocation(), diag::warn_property_attribute)
	<< NewProperty->getDeclName() << "atomic"
	<< OldContextName;
	S.Diag(OldProperty->getLocation(), diag::note_property_declare);
	}

	ObjCPropertyDecl *
	Sema::HandlePropertyInClassExtension(Scope *S,
	SourceLocation AtLoc,
	SourceLocation LParenLoc,
	FieldDeclarator &FD,
	Selector GetterSel,
	SourceLocation GetterNameLoc,
	Selector SetterSel,
	SourceLocation SetterNameLoc,
	const bool isReadWrite,
	unsigned &Attributes,
	const unsigned AttributesAsWritten,
	QualType T,
	TypeSourceInfo *TSI,
	tok::ObjCKeywordKind MethodImplKind) {
	ObjCCategoryDecl *CDecl = cast<ObjCCategoryDecl>(CurContext);
	// Diagnose if this property is already in continuation class.
	DeclContext *DC = CurContext;
	IdentifierInfo *PropertyId = FD.D.getIdentifier();
	ObjCInterfaceDecl *CCPrimary = CDecl->getClassInterface();

	// We need to look in the @interface to see if the @property was
	// already declared.
	if (!CCPrimary) {
	Diag(CDecl->getLocation(), diag::err_continuation_class);
	return nullptr;
	}

	bool isClassProperty = (AttributesAsWritten & ObjCDeclSpec::DQ_PR_class) \|\|
	(Attributes & ObjCDeclSpec::DQ_PR_class);

	// Find the property in the extended class's primary class or
	// extensions.
	ObjCPropertyDecl *PIDecl = CCPrimary->FindPropertyVisibleInPrimaryClass(
	PropertyId, ObjCPropertyDecl::getQueryKind(isClassProperty));

	// If we found a property in an extension, complain.
	if (PIDecl && isa<ObjCCategoryDecl>(PIDecl->getDeclContext())) {
	Diag(AtLoc, diag::err_duplicate_property);
	Diag(PIDecl->getLocation(), diag::note_property_declare);
	return nullptr;
	}

	// Check for consistency with the previous declaration, if there is one.
	if (PIDecl) {
	// A readonly property declared in the primary class can be refined
	// by adding a readwrite property within an extension.
	// Anything else is an error.
	if (!(PIDecl->isReadOnly() && isReadWrite)) {
	// Tailor the diagnostics for the common case where a readwrite
	// property is declared both in the @interface and the continuation.
	// This is a common error where the user often intended the original
	// declaration to be readonly.
	unsigned diag =
	(Attributes & ObjCDeclSpec::DQ_PR_readwrite) &&
	(PIDecl->getPropertyAttributesAsWritten() &
	ObjCPropertyDecl::OBJC_PR_readwrite)
	? diag::err_use_continuation_class_redeclaration_readwrite
	: diag::err_use_continuation_class;
	Diag(AtLoc, diag)
	<< CCPrimary->getDeclName();
	Diag(PIDecl->getLocation(), diag::note_property_declare);
	return nullptr;
	}

	// Check for consistency of getters.
	if (PIDecl->getGetterName() != GetterSel) {
	// If the getter was written explicitly, complain.
	if (AttributesAsWritten & ObjCDeclSpec::DQ_PR_getter) {
	Diag(AtLoc, diag::warn_property_redecl_getter_mismatch)
	<< PIDecl->getGetterName() << GetterSel;
	Diag(PIDecl->getLocation(), diag::note_property_declare);
	}

	// Always adopt the getter from the original declaration.
	GetterSel = PIDecl->getGetterName();
	Attributes \|= ObjCDeclSpec::DQ_PR_getter;
	}

	// Check consistency of ownership.
	unsigned ExistingOwnership
	= getOwnershipRule(PIDecl->getPropertyAttributes());
	unsigned NewOwnership = getOwnershipRule(Attributes);
	if (ExistingOwnership && NewOwnership != ExistingOwnership) {
	// If the ownership was written explicitly, complain.
	if (getOwnershipRule(AttributesAsWritten)) {
	Diag(AtLoc, diag::warn_property_attr_mismatch);
	Diag(PIDecl->getLocation(), diag::note_property_declare);
	}

	// Take the ownership from the original property.
	Attributes = (Attributes & ~OwnershipMask) \| ExistingOwnership;
	}

	// If the redeclaration is 'weak' but the original property is not,
	if ((Attributes & ObjCPropertyDecl::OBJC_PR_weak) &&
	!(PIDecl->getPropertyAttributesAsWritten()
	& ObjCPropertyDecl::OBJC_PR_weak) &&
	PIDecl->getType()->getAs<ObjCObjectPointerType>() &&
	PIDecl->getType().getObjCLifetime() == Qualifiers::OCL_None) {
	Diag(AtLoc, diag::warn_property_implicitly_mismatched);
	Diag(PIDecl->getLocation(), diag::note_property_declare);
	}
	}

	// Create a new ObjCPropertyDecl with the DeclContext being
	// the class extension.
	ObjCPropertyDecl *PDecl = CreatePropertyDecl(S, CDecl, AtLoc, LParenLoc,
	FD, GetterSel, GetterNameLoc,
	SetterSel, SetterNameLoc,
	isReadWrite,
	Attributes, AttributesAsWritten,
	T, TSI, MethodImplKind, DC);

	// If there was no declaration of a property with the same name in
	// the primary class, we're done.
	if (!PIDecl) {
	ProcessPropertyDecl(PDecl);
	return PDecl;
	}

	if (!Context.hasSameType(PIDecl->getType(), PDecl->getType())) {
	bool IncompatibleObjC = false;
	QualType ConvertedType;
	// Relax the strict type matching for property type in continuation class.
	// Allow property object type of continuation class to be different as long
	// as it narrows the object type in its primary class property. Note that
	// this conversion is safe only because the wider type is for a 'readonly'
	// property in primary class and 'narrowed' type for a 'readwrite' property
	// in continuation class.
	QualType PrimaryClassPropertyT = Context.getCanonicalType(PIDecl->getType());
	QualType ClassExtPropertyT = Context.getCanonicalType(PDecl->getType());
	if (!isa<ObjCObjectPointerType>(PrimaryClassPropertyT) \|\|
	!isa<ObjCObjectPointerType>(ClassExtPropertyT) \|\|
	(!isObjCPointerConversion(ClassExtPropertyT, PrimaryClassPropertyT,
	ConvertedType, IncompatibleObjC))
	\|\| IncompatibleObjC) {
	Diag(AtLoc,
	diag::err_type_mismatch_continuation_class) << PDecl->getType();
	Diag(PIDecl->getLocation(), diag::note_property_declare);
	return nullptr;
	}
	}

	// Check that atomicity of property in class extension matches the previous
	// declaration.
	checkAtomicPropertyMismatch(*this, PIDecl, PDecl, true);

	// Make sure getter/setter are appropriately synthesized.
	ProcessPropertyDecl(PDecl);
	return PDecl;
	}

	ObjCPropertyDecl Sema::CreatePropertyDecl(Scope S,
	ObjCContainerDecl *CDecl,
	SourceLocation AtLoc,
	SourceLocation LParenLoc,
	FieldDeclarator &FD,
	Selector GetterSel,
	SourceLocation GetterNameLoc,
	Selector SetterSel,
	SourceLocation SetterNameLoc,
	const bool isReadWrite,
	const unsigned Attributes,
	const unsigned AttributesAsWritten,
	QualType T,
	TypeSourceInfo *TInfo,
	tok::ObjCKeywordKind MethodImplKind,
	DeclContext *lexicalDC){
	IdentifierInfo *PropertyId = FD.D.getIdentifier();

	// Property defaults to 'assign' if it is readwrite, unless this is ARC
	// and the type is retainable.
	bool isAssign;
	if (Attributes & (ObjCDeclSpec::DQ_PR_assign \|
	ObjCDeclSpec::DQ_PR_unsafe_unretained)) {
	isAssign = true;
	} else if (getOwnershipRule(Attributes) \|\| !isReadWrite) {
	isAssign = false;
	} else {
	isAssign = (!getLangOpts().ObjCAutoRefCount \|\|
	!T->isObjCRetainableType());
	}

	// Issue a warning if property is 'assign' as default and its
	// object, which is gc'able conforms to NSCopying protocol
	if (getLangOpts().getGC() != LangOptions::NonGC &&
	isAssign && !(Attributes & ObjCDeclSpec::DQ_PR_assign)) {
	if (const ObjCObjectPointerType *ObjPtrTy =
	T->getAs<ObjCObjectPointerType>()) {
	ObjCInterfaceDecl *IDecl = ObjPtrTy->getObjectType()->getInterface();
	if (IDecl)
	if (ObjCProtocolDecl* PNSCopying =
	LookupProtocol(&Context.Idents.get("NSCopying"), AtLoc))
	if (IDecl->ClassImplementsProtocol(PNSCopying, true))
	Diag(AtLoc, diag::warn_implements_nscopying) << PropertyId;
	}
	}

	if (T->isObjCObjectType()) {
	SourceLocation StarLoc = TInfo->getTypeLoc().getLocEnd();
	StarLoc = getLocForEndOfToken(StarLoc);
	Diag(FD.D.getIdentifierLoc(), diag::err_statically_allocated_object)
	<< FixItHint::CreateInsertion(StarLoc, "*");
	T = Context.getObjCObjectPointerType(T);
	SourceLocation TLoc = TInfo->getTypeLoc().getLocStart();
	TInfo = Context.getTrivialTypeSourceInfo(T, TLoc);
	}

	DeclContext *DC = cast<DeclContext>(CDecl);
	ObjCPropertyDecl *PDecl = ObjCPropertyDecl::Create(Context, DC,
	FD.D.getIdentifierLoc(),
	PropertyId, AtLoc,
	LParenLoc, T, TInfo);

	bool isClassProperty = (AttributesAsWritten & ObjCDeclSpec::DQ_PR_class) \|\|
	(Attributes & ObjCDeclSpec::DQ_PR_class);
	// Class property and instance property can have the same name.
	if (ObjCPropertyDecl *prevDecl = ObjCPropertyDecl::findPropertyDecl(
	DC, PropertyId, ObjCPropertyDecl::getQueryKind(isClassProperty))) {
	Diag(PDecl->getLocation(), diag::err_duplicate_property);
	Diag(prevDecl->getLocation(), diag::note_property_declare);
	PDecl->setInvalidDecl();
	}
	else {
	DC->addDecl(PDecl);
	if (lexicalDC)
	PDecl->setLexicalDeclContext(lexicalDC);
	}

	if (T->isArrayType() \|\| T->isFunctionType()) {
	Diag(AtLoc, diag::err_property_type) << T;
	PDecl->setInvalidDecl();
	}

	ProcessDeclAttributes(S, PDecl, FD.D);

	// Regardless of setter/getter attribute, we save the default getter/setter
	// selector names in anticipation of declaration of setter/getter methods.
	PDecl->setGetterName(GetterSel, GetterNameLoc);
	PDecl->setSetterName(SetterSel, SetterNameLoc);
	PDecl->setPropertyAttributesAsWritten(
	makePropertyAttributesAsWritten(AttributesAsWritten));

	if (Attributes & ObjCDeclSpec::DQ_PR_readonly)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_readonly);

	if (Attributes & ObjCDeclSpec::DQ_PR_getter)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_getter);

	if (Attributes & ObjCDeclSpec::DQ_PR_setter)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_setter);

	if (isReadWrite)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_readwrite);

	if (Attributes & ObjCDeclSpec::DQ_PR_retain)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_retain);

	if (Attributes & ObjCDeclSpec::DQ_PR_strong)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_strong);

	if (Attributes & ObjCDeclSpec::DQ_PR_weak)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_weak);

	if (Attributes & ObjCDeclSpec::DQ_PR_copy)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_copy);

	if (Attributes & ObjCDeclSpec::DQ_PR_unsafe_unretained)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_unsafe_unretained);

	if (isAssign)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_assign);

	// In the semantic attributes, one of nonatomic or atomic is always set.
	if (Attributes & ObjCDeclSpec::DQ_PR_nonatomic)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_nonatomic);
	else
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_atomic);

	// 'unsafe_unretained' is alias for 'assign'.
	if (Attributes & ObjCDeclSpec::DQ_PR_unsafe_unretained)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_assign);
	if (isAssign)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_unsafe_unretained);

	if (MethodImplKind == tok::objc_required)
	PDecl->setPropertyImplementation(ObjCPropertyDecl::Required);
	else if (MethodImplKind == tok::objc_optional)
	PDecl->setPropertyImplementation(ObjCPropertyDecl::Optional);

	if (Attributes & ObjCDeclSpec::DQ_PR_nullability)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_nullability);

	if (Attributes & ObjCDeclSpec::DQ_PR_null_resettable)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_null_resettable);

	if (Attributes & ObjCDeclSpec::DQ_PR_class)
	PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_class);

	return PDecl;
	}

	static void checkARCPropertyImpl(Sema &S, SourceLocation propertyImplLoc,
	ObjCPropertyDecl *property,
	ObjCIvarDecl *ivar) {
	if (property->isInvalidDecl() \|\| ivar->isInvalidDecl()) return;

	QualType ivarType = ivar->getType();
	Qualifiers::ObjCLifetime ivarLifetime = ivarType.getObjCLifetime();

	// The lifetime implied by the property's attributes.
	Qualifiers::ObjCLifetime propertyLifetime =
	getImpliedARCOwnership(property->getPropertyAttributes(),
	property->getType());

	// We're fine if they match.
	if (propertyLifetime == ivarLifetime) return;

	// None isn't a valid lifetime for an object ivar in ARC, and
	// __autoreleasing is never valid; don't diagnose twice.
	if ((ivarLifetime == Qualifiers::OCL_None &&
	S.getLangOpts().ObjCAutoRefCount) \|\|
	ivarLifetime == Qualifiers::OCL_Autoreleasing)
	return;

	// If the ivar is private, and it's implicitly __unsafe_unretained
	// becaues of its type, then pretend it was actually implicitly
	// __strong. This is only sound because we're processing the
	// property implementation before parsing any method bodies.
	if (ivarLifetime == Qualifiers::OCL_ExplicitNone &&
	propertyLifetime == Qualifiers::OCL_Strong &&
	ivar->getAccessControl() == ObjCIvarDecl::Private) {
	SplitQualType split = ivarType.split();
	if (split.Quals.hasObjCLifetime()) {
	assert(ivarType->isObjCARCImplicitlyUnretainedType());
	split.Quals.setObjCLifetime(Qualifiers::OCL_Strong);
	ivarType = S.Context.getQualifiedType(split);
	ivar->setType(ivarType);
	return;
	}
	}

	switch (propertyLifetime) {
	case Qualifiers::OCL_Strong:
	S.Diag(ivar->getLocation(), diag::err_arc_strong_property_ownership)
	<< property->getDeclName()
	<< ivar->getDeclName()
	<< ivarLifetime;
	break;

	case Qualifiers::OCL_Weak:
	S.Diag(ivar->getLocation(), diag::err_weak_property)
	<< property->getDeclName()
	<< ivar->getDeclName();
	break;

	case Qualifiers::OCL_ExplicitNone:
	S.Diag(ivar->getLocation(), diag::err_arc_assign_property_ownership)
	<< property->getDeclName()
	<< ivar->getDeclName()
	<< ((property->getPropertyAttributesAsWritten()
	& ObjCPropertyDecl::OBJC_PR_assign) != 0);
	break;

	case Qualifiers::OCL_Autoreleasing:
	llvm_unreachable("properties cannot be autoreleasing");

	case Qualifiers::OCL_None:
	// Any other property should be ignored.
	return;
	}

	S.Diag(property->getLocation(), diag::note_property_declare);
	if (propertyImplLoc.isValid())
	S.Diag(propertyImplLoc, diag::note_property_synthesize);
	}

	/// setImpliedPropertyAttributeForReadOnlyProperty -
	/// This routine evaludates life-time attributes for a 'readonly'
	/// property with no known lifetime of its own, using backing
	/// 'ivar's attribute, if any. If no backing 'ivar', property's
	/// life-time is assumed 'strong'.
	static void setImpliedPropertyAttributeForReadOnlyProperty(
	ObjCPropertyDecl property, ObjCIvarDecl ivar) {
	Qualifiers::ObjCLifetime propertyLifetime =
	getImpliedARCOwnership(property->getPropertyAttributes(),
	property->getType());
	if (propertyLifetime != Qualifiers::OCL_None)
	return;

	if (!ivar) {
	// if no backing ivar, make property 'strong'.
	property->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_strong);
	return;
	}
	// property assumes owenership of backing ivar.
	QualType ivarType = ivar->getType();
	Qualifiers::ObjCLifetime ivarLifetime = ivarType.getObjCLifetime();
	if (ivarLifetime == Qualifiers::OCL_Strong)
	property->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_strong);
	else if (ivarLifetime == Qualifiers::OCL_Weak)
	property->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_weak);
	}

	static bool
	isIncompatiblePropertyAttribute(unsigned Attr1, unsigned Attr2,
	ObjCPropertyDecl::PropertyAttributeKind Kind) {
	return (Attr1 & Kind) != (Attr2 & Kind);
	}

	static bool areIncompatiblePropertyAttributes(unsigned Attr1, unsigned Attr2,
	unsigned Kinds) {
	return ((Attr1 & Kinds) != 0) != ((Attr2 & Kinds) != 0);
	}

	/// SelectPropertyForSynthesisFromProtocols - Finds the most appropriate
	/// property declaration that should be synthesised in all of the inherited
	/// protocols. It also diagnoses properties declared in inherited protocols with
	/// mismatched types or attributes, since any of them can be candidate for
	/// synthesis.
	static ObjCPropertyDecl *
	SelectPropertyForSynthesisFromProtocols(Sema &S, SourceLocation AtLoc,
	ObjCInterfaceDecl *ClassDecl,
	ObjCPropertyDecl *Property) {
	assert(isa<ObjCProtocolDecl>(Property->getDeclContext()) &&
	"Expected a property from a protocol");
	ObjCInterfaceDecl::ProtocolPropertySet ProtocolSet;
	ObjCInterfaceDecl::PropertyDeclOrder Properties;
	for (const auto *PI : ClassDecl->all_referenced_protocols()) {
	if (const ObjCProtocolDecl *PDecl = PI->getDefinition())
	PDecl->collectInheritedProtocolProperties(Property, ProtocolSet,
	Properties);
	}
	if (ObjCInterfaceDecl *SDecl = ClassDecl->getSuperClass()) {
	while (SDecl) {
	for (const auto *PI : SDecl->all_referenced_protocols()) {
	if (const ObjCProtocolDecl *PDecl = PI->getDefinition())
	PDecl->collectInheritedProtocolProperties(Property, ProtocolSet,
	Properties);
	}
	SDecl = SDecl->getSuperClass();
	}
	}

	if (Properties.empty())
	return Property;

	ObjCPropertyDecl *OriginalProperty = Property;
	size_t SelectedIndex = 0;
	for (const auto &Prop : llvm::enumerate(Properties)) {
	// Select the 'readwrite' property if such property exists.
	if (Property->isReadOnly() && !Prop.value()->isReadOnly()) {
	Property = Prop.value();
	SelectedIndex = Prop.index();
	}
	}
	if (Property != OriginalProperty) {
	// Check that the old property is compatible with the new one.
	Properties[SelectedIndex] = OriginalProperty;
	}

	QualType RHSType = S.Context.getCanonicalType(Property->getType());
	- unsigned OriginalAttributes = Property->getPropertyAttributes();
	+ unsigned OriginalAttributes = Property->getPropertyAttributesAsWritten();
	enum MismatchKind {
	IncompatibleType = 0,
	HasNoExpectedAttribute,
	HasUnexpectedAttribute,
	DifferentGetter,
	DifferentSetter
	};
	// Represents a property from another protocol that conflicts with the
	// selected declaration.
	struct MismatchingProperty {
	const ObjCPropertyDecl *Prop;
	MismatchKind Kind;
	StringRef AttributeName;
	};
	SmallVector<MismatchingProperty, 4> Mismatches;
	for (ObjCPropertyDecl *Prop : Properties) {
	// Verify the property attributes.
	- unsigned Attr = Prop->getPropertyAttributes();
	+ unsigned Attr = Prop->getPropertyAttributesAsWritten();
	if (Attr != OriginalAttributes) {
	auto Diag = [&](bool OriginalHasAttribute, StringRef AttributeName) {
	MismatchKind Kind = OriginalHasAttribute ? HasNoExpectedAttribute
	: HasUnexpectedAttribute;
	Mismatches.push_back({Prop, Kind, AttributeName});
	};
	if (isIncompatiblePropertyAttribute(OriginalAttributes, Attr,
	ObjCPropertyDecl::OBJC_PR_copy)) {
	Diag(OriginalAttributes & ObjCPropertyDecl::OBJC_PR_copy, "copy");
	continue;
	}
	if (areIncompatiblePropertyAttributes(
	OriginalAttributes, Attr, ObjCPropertyDecl::OBJC_PR_retain \|
	ObjCPropertyDecl::OBJC_PR_strong)) {
	Diag(OriginalAttributes & (ObjCPropertyDecl::OBJC_PR_retain \|
	ObjCPropertyDecl::OBJC_PR_strong),
	"retain (or strong)");
	continue;
	}
	if (isIncompatiblePropertyAttribute(OriginalAttributes, Attr,
	ObjCPropertyDecl::OBJC_PR_atomic)) {
	Diag(OriginalAttributes & ObjCPropertyDecl::OBJC_PR_atomic, "atomic");
	continue;
	}
	}
	if (Property->getGetterName() != Prop->getGetterName()) {
	Mismatches.push_back({Prop, DifferentGetter, ""});
	continue;
	}
	if (!Property->isReadOnly() && !Prop->isReadOnly() &&
	Property->getSetterName() != Prop->getSetterName()) {
	Mismatches.push_back({Prop, DifferentSetter, ""});
	continue;
	}
	QualType LHSType = S.Context.getCanonicalType(Prop->getType());
	if (!S.Context.propertyTypesAreCompatible(LHSType, RHSType)) {
	bool IncompatibleObjC = false;
	QualType ConvertedType;
	if (!S.isObjCPointerConversion(RHSType, LHSType, ConvertedType, IncompatibleObjC)
	\|\| IncompatibleObjC) {
	Mismatches.push_back({Prop, IncompatibleType, ""});
	continue;
	}
	}
	}

	if (Mismatches.empty())
	return Property;

	// Diagnose incompability.
	{
	bool HasIncompatibleAttributes = false;
	for (const auto &Note : Mismatches)
	HasIncompatibleAttributes =
	Note.Kind != IncompatibleType ? true : HasIncompatibleAttributes;
	// Promote the warning to an error if there are incompatible attributes or
	// incompatible types together with readwrite/readonly incompatibility.
	auto Diag = S.Diag(Property->getLocation(),
	Property != OriginalProperty \|\| HasIncompatibleAttributes
	? diag::err_protocol_property_mismatch
	: diag::warn_protocol_property_mismatch);
	Diag << Mismatches[0].Kind;
	switch (Mismatches[0].Kind) {
	case IncompatibleType:
	Diag << Property->getType();
	break;
	case HasNoExpectedAttribute:
	case HasUnexpectedAttribute:
	Diag << Mismatches[0].AttributeName;
	break;
	case DifferentGetter:
	Diag << Property->getGetterName();
	break;
	case DifferentSetter:
	Diag << Property->getSetterName();
	break;
	}
	}
	for (const auto &Note : Mismatches) {
	auto Diag =
	S.Diag(Note.Prop->getLocation(), diag::note_protocol_property_declare)
	<< Note.Kind;
	switch (Note.Kind) {
	case IncompatibleType:
	Diag << Note.Prop->getType();
	break;
	case HasNoExpectedAttribute:
	case HasUnexpectedAttribute:
	Diag << Note.AttributeName;
	break;
	case DifferentGetter:
	Diag << Note.Prop->getGetterName();
	break;
	case DifferentSetter:
	Diag << Note.Prop->getSetterName();
	break;
	}
	}
	if (AtLoc.isValid())
	S.Diag(AtLoc, diag::note_property_synthesize);

	return Property;
	}

	/// Determine whether any storage attributes were written on the property.
	static bool hasWrittenStorageAttribute(ObjCPropertyDecl *Prop,
	ObjCPropertyQueryKind QueryKind) {
	if (Prop->getPropertyAttributesAsWritten() & OwnershipMask) return true;

	// If this is a readwrite property in a class extension that refines
	// a readonly property in the original class definition, check it as
	// well.

	// If it's a readonly property, we're not interested.
	if (Prop->isReadOnly()) return false;

	// Is it declared in an extension?
	auto Category = dyn_cast<ObjCCategoryDecl>(Prop->getDeclContext());
	if (!Category \|\| !Category->IsClassExtension()) return false;

	// Find the corresponding property in the primary class definition.
	auto OrigClass = Category->getClassInterface();
	for (auto Found : OrigClass->lookup(Prop->getDeclName())) {
	if (ObjCPropertyDecl *OrigProp = dyn_cast<ObjCPropertyDecl>(Found))
	return OrigProp->getPropertyAttributesAsWritten() & OwnershipMask;
	}

	// Look through all of the protocols.
	for (const auto *Proto : OrigClass->all_referenced_protocols()) {
	if (ObjCPropertyDecl *OrigProp = Proto->FindPropertyDeclaration(
	Prop->getIdentifier(), QueryKind))
	return OrigProp->getPropertyAttributesAsWritten() & OwnershipMask;
	}

	return false;
	}

	/// ActOnPropertyImplDecl - This routine performs semantic checks and
	/// builds the AST node for a property implementation declaration; declared
	/// as \@synthesize or \@dynamic.
	///
	Decl Sema::ActOnPropertyImplDecl(Scope S,
	SourceLocation AtLoc,
	SourceLocation PropertyLoc,
	bool Synthesize,
	IdentifierInfo *PropertyId,
	IdentifierInfo *PropertyIvar,
	SourceLocation PropertyIvarLoc,
	ObjCPropertyQueryKind QueryKind) {
	ObjCContainerDecl *ClassImpDecl =
	dyn_cast<ObjCContainerDecl>(CurContext);
	// Make sure we have a context for the property implementation declaration.
	if (!ClassImpDecl) {
	Diag(AtLoc, diag::err_missing_property_context);
	return nullptr;
	}
	if (PropertyIvarLoc.isInvalid())
	PropertyIvarLoc = PropertyLoc;
	SourceLocation PropertyDiagLoc = PropertyLoc;
	if (PropertyDiagLoc.isInvalid())
	PropertyDiagLoc = ClassImpDecl->getLocStart();
	ObjCPropertyDecl *property = nullptr;
	ObjCInterfaceDecl *IDecl = nullptr;
	// Find the class or category class where this property must have
	// a declaration.
	ObjCImplementationDecl *IC = nullptr;
	ObjCCategoryImplDecl *CatImplClass = nullptr;
	if ((IC = dyn_cast<ObjCImplementationDecl>(ClassImpDecl))) {
	IDecl = IC->getClassInterface();
	// We always synthesize an interface for an implementation
	// without an interface decl. So, IDecl is always non-zero.
	assert(IDecl &&
	"ActOnPropertyImplDecl - @implementation without @interface");

	// Look for this property declaration in the @implementation's @interface
	property = IDecl->FindPropertyDeclaration(PropertyId, QueryKind);
	if (!property) {
	Diag(PropertyLoc, diag::err_bad_property_decl) << IDecl->getDeclName();
	return nullptr;
	}
	if (property->isClassProperty() && Synthesize) {
	Diag(PropertyLoc, diag::err_synthesize_on_class_property) << PropertyId;
	return nullptr;
	}
	unsigned PIkind = property->getPropertyAttributesAsWritten();
	if ((PIkind & (ObjCPropertyDecl::OBJC_PR_atomic \|
	ObjCPropertyDecl::OBJC_PR_nonatomic) ) == 0) {
	if (AtLoc.isValid())
	Diag(AtLoc, diag::warn_implicit_atomic_property);
	else
	Diag(IC->getLocation(), diag::warn_auto_implicit_atomic_property);
	Diag(property->getLocation(), diag::note_property_declare);
	}

	if (const ObjCCategoryDecl *CD =
	dyn_cast<ObjCCategoryDecl>(property->getDeclContext())) {
	if (!CD->IsClassExtension()) {
	Diag(PropertyLoc, diag::err_category_property) << CD->getDeclName();
	Diag(property->getLocation(), diag::note_property_declare);
	return nullptr;
	}
	}
	if (Synthesize&&
	(PIkind & ObjCPropertyDecl::OBJC_PR_readonly) &&
	property->hasAttr<IBOutletAttr>() &&
	!AtLoc.isValid()) {
	bool ReadWriteProperty = false;
	// Search into the class extensions and see if 'readonly property is
	// redeclared 'readwrite', then no warning is to be issued.
	for (auto *Ext : IDecl->known_extensions()) {
	DeclContext::lookup_result R = Ext->lookup(property->getDeclName());
	if (!R.empty())
	if (ObjCPropertyDecl *ExtProp = dyn_cast<ObjCPropertyDecl>(R[0])) {
	PIkind = ExtProp->getPropertyAttributesAsWritten();
	if (PIkind & ObjCPropertyDecl::OBJC_PR_readwrite) {
	ReadWriteProperty = true;
	break;
	}
	}
	}

	if (!ReadWriteProperty) {
	Diag(property->getLocation(), diag::warn_auto_readonly_iboutlet_property)
	<< property;
	SourceLocation readonlyLoc;
	if (LocPropertyAttribute(Context, "readonly",
	property->getLParenLoc(), readonlyLoc)) {
	SourceLocation endLoc =
	readonlyLoc.getLocWithOffset(strlen("readonly")-1);
	SourceRange ReadonlySourceRange(readonlyLoc, endLoc);
	Diag(property->getLocation(),
	diag::note_auto_readonly_iboutlet_fixup_suggest) <<
	FixItHint::CreateReplacement(ReadonlySourceRange, "readwrite");
	}
	}
	}
	if (Synthesize && isa<ObjCProtocolDecl>(property->getDeclContext()))
	property = SelectPropertyForSynthesisFromProtocols(*this, AtLoc, IDecl,
	property);

	} else if ((CatImplClass = dyn_cast<ObjCCategoryImplDecl>(ClassImpDecl))) {
	if (Synthesize) {
	Diag(AtLoc, diag::err_synthesize_category_decl);
	return nullptr;
	}
	IDecl = CatImplClass->getClassInterface();
	if (!IDecl) {
	Diag(AtLoc, diag::err_missing_property_interface);
	return nullptr;
	}
	ObjCCategoryDecl *Category =
	IDecl->FindCategoryDeclaration(CatImplClass->getIdentifier());

	// If category for this implementation not found, it is an error which
	// has already been reported eralier.
	if (!Category)
	return nullptr;
	// Look for this property declaration in @implementation's category
	property = Category->FindPropertyDeclaration(PropertyId, QueryKind);
	if (!property) {
	Diag(PropertyLoc, diag::err_bad_category_property_decl)
	<< Category->getDeclName();
	return nullptr;
	}
	} else {
	Diag(AtLoc, diag::err_bad_property_context);
	return nullptr;
	}
	ObjCIvarDecl *Ivar = nullptr;
	bool CompleteTypeErr = false;
	bool compat = true;
	// Check that we have a valid, previously declared ivar for @synthesize
	if (Synthesize) {
	// @synthesize
	if (!PropertyIvar)
	PropertyIvar = PropertyId;
	// Check that this is a previously declared 'ivar' in 'IDecl' interface
	ObjCInterfaceDecl *ClassDeclared;
	Ivar = IDecl->lookupInstanceVariable(PropertyIvar, ClassDeclared);
	QualType PropType = property->getType();
	QualType PropertyIvarType = PropType.getNonReferenceType();

	if (RequireCompleteType(PropertyDiagLoc, PropertyIvarType,
	diag::err_incomplete_synthesized_property,
	property->getDeclName())) {
	Diag(property->getLocation(), diag::note_property_declare);
	CompleteTypeErr = true;
	}

	if (getLangOpts().ObjCAutoRefCount &&
	(property->getPropertyAttributesAsWritten() &
	ObjCPropertyDecl::OBJC_PR_readonly) &&
	PropertyIvarType->isObjCRetainableType()) {
	setImpliedPropertyAttributeForReadOnlyProperty(property, Ivar);
	}

	ObjCPropertyDecl::PropertyAttributeKind kind
	= property->getPropertyAttributes();

	bool isARCWeak = false;
	if (kind & ObjCPropertyDecl::OBJC_PR_weak) {
	// Add GC __weak to the ivar type if the property is weak.
	if (getLangOpts().getGC() != LangOptions::NonGC) {
	assert(!getLangOpts().ObjCAutoRefCount);
	if (PropertyIvarType.isObjCGCStrong()) {
	Diag(PropertyDiagLoc, diag::err_gc_weak_property_strong_type);
	Diag(property->getLocation(), diag::note_property_declare);
	} else {
	PropertyIvarType =
	Context.getObjCGCQualType(PropertyIvarType, Qualifiers::Weak);
	}

	// Otherwise, check whether ARC __weak is enabled and works with
	// the property type.
	} else {
	if (!getLangOpts().ObjCWeak) {
	// Only complain here when synthesizing an ivar.
	if (!Ivar) {
	Diag(PropertyDiagLoc,
	getLangOpts().ObjCWeakRuntime
	? diag::err_synthesizing_arc_weak_property_disabled
	: diag::err_synthesizing_arc_weak_property_no_runtime);
	Diag(property->getLocation(), diag::note_property_declare);
	}
	CompleteTypeErr = true; // suppress later diagnostics about the ivar
	} else {
	isARCWeak = true;
	if (const ObjCObjectPointerType *ObjT =
	PropertyIvarType->getAs<ObjCObjectPointerType>()) {
	const ObjCInterfaceDecl *ObjI = ObjT->getInterfaceDecl();
	if (ObjI && ObjI->isArcWeakrefUnavailable()) {
	Diag(property->getLocation(),
	diag::err_arc_weak_unavailable_property)
	<< PropertyIvarType;
	Diag(ClassImpDecl->getLocation(), diag::note_implemented_by_class)
	<< ClassImpDecl->getName();
	}
	}
	}
	}
	}

	if (AtLoc.isInvalid()) {
	// Check when default synthesizing a property that there is
	// an ivar matching property name and issue warning; since this
	// is the most common case of not using an ivar used for backing
	// property in non-default synthesis case.
	ObjCInterfaceDecl *ClassDeclared=nullptr;
	ObjCIvarDecl *originalIvar =
	IDecl->lookupInstanceVariable(property->getIdentifier(),
	ClassDeclared);
	if (originalIvar) {
	Diag(PropertyDiagLoc,
	diag::warn_autosynthesis_property_ivar_match)
	<< PropertyId << (Ivar == nullptr) << PropertyIvar
	<< originalIvar->getIdentifier();
	Diag(property->getLocation(), diag::note_property_declare);
	Diag(originalIvar->getLocation(), diag::note_ivar_decl);
	}
	}

	if (!Ivar) {
	// In ARC, give the ivar a lifetime qualifier based on the
	// property attributes.
	if ((getLangOpts().ObjCAutoRefCount \|\| isARCWeak) &&
	!PropertyIvarType.getObjCLifetime() &&
	PropertyIvarType->isObjCRetainableType()) {

	// It's an error if we have to do this and the user didn't
	// explicitly write an ownership attribute on the property.
	if (!hasWrittenStorageAttribute(property, QueryKind) &&
	!(kind & ObjCPropertyDecl::OBJC_PR_strong)) {
	Diag(PropertyDiagLoc,
	diag::err_arc_objc_property_default_assign_on_object);
	Diag(property->getLocation(), diag::note_property_declare);
	} else {
	Qualifiers::ObjCLifetime lifetime =
	getImpliedARCOwnership(kind, PropertyIvarType);
	assert(lifetime && "no lifetime for property?");

	Qualifiers qs;
	qs.addObjCLifetime(lifetime);
	PropertyIvarType = Context.getQualifiedType(PropertyIvarType, qs);
	}
	}

	Ivar = ObjCIvarDecl::Create(Context, ClassImpDecl,
	PropertyIvarLoc,PropertyIvarLoc, PropertyIvar,
	PropertyIvarType, /Dinfo=/nullptr,
	ObjCIvarDecl::Private,
	(Expr *)nullptr, true);
	if (RequireNonAbstractType(PropertyIvarLoc,
	PropertyIvarType,
	diag::err_abstract_type_in_decl,
	AbstractSynthesizedIvarType)) {
	Diag(property->getLocation(), diag::note_property_declare);
	// An abstract type is as bad as an incomplete type.
	CompleteTypeErr = true;
	}
	if (CompleteTypeErr)
	Ivar->setInvalidDecl();
	ClassImpDecl->addDecl(Ivar);
	IDecl->makeDeclVisibleInContext(Ivar);

	if (getLangOpts().ObjCRuntime.isFragile())
	Diag(PropertyDiagLoc, diag::err_missing_property_ivar_decl)
	<< PropertyId;
	// Note! I deliberately want it to fall thru so, we have a
	// a property implementation and to avoid future warnings.
	} else if (getLangOpts().ObjCRuntime.isNonFragile() &&
	!declaresSameEntity(ClassDeclared, IDecl)) {
	Diag(PropertyDiagLoc, diag::err_ivar_in_superclass_use)
	<< property->getDeclName() << Ivar->getDeclName()
	<< ClassDeclared->getDeclName();
	Diag(Ivar->getLocation(), diag::note_previous_access_declaration)
	<< Ivar << Ivar->getName();
	// Note! I deliberately want it to fall thru so more errors are caught.
	}
	property->setPropertyIvarDecl(Ivar);

	QualType IvarType = Context.getCanonicalType(Ivar->getType());

	// Check that type of property and its ivar are type compatible.
	if (!Context.hasSameType(PropertyIvarType, IvarType)) {
	if (isa<ObjCObjectPointerType>(PropertyIvarType)
	&& isa<ObjCObjectPointerType>(IvarType))
	compat =
	Context.canAssignObjCInterfaces(
	PropertyIvarType->getAs<ObjCObjectPointerType>(),
	IvarType->getAs<ObjCObjectPointerType>());
	else {
	compat = (CheckAssignmentConstraints(PropertyIvarLoc, PropertyIvarType,
	IvarType)
	== Compatible);
	}
	if (!compat) {
	Diag(PropertyDiagLoc, diag::err_property_ivar_type)
	<< property->getDeclName() << PropType
	<< Ivar->getDeclName() << IvarType;
	Diag(Ivar->getLocation(), diag::note_ivar_decl);
	// Note! I deliberately want it to fall thru so, we have a
	// a property implementation and to avoid future warnings.
	}
	else {
	// FIXME! Rules for properties are somewhat different that those
	// for assignments. Use a new routine to consolidate all cases;
	// specifically for property redeclarations as well as for ivars.
	QualType lhsType =Context.getCanonicalType(PropertyIvarType).getUnqualifiedType();
	QualType rhsType =Context.getCanonicalType(IvarType).getUnqualifiedType();
	if (lhsType != rhsType &&
	lhsType->isArithmeticType()) {
	Diag(PropertyDiagLoc, diag::err_property_ivar_type)
	<< property->getDeclName() << PropType
	<< Ivar->getDeclName() << IvarType;
	Diag(Ivar->getLocation(), diag::note_ivar_decl);
	// Fall thru - see previous comment
	}
	}
	// __weak is explicit. So it works on Canonical type.
	if ((PropType.isObjCGCWeak() && !IvarType.isObjCGCWeak() &&
	getLangOpts().getGC() != LangOptions::NonGC)) {
	Diag(PropertyDiagLoc, diag::err_weak_property)
	<< property->getDeclName() << Ivar->getDeclName();
	Diag(Ivar->getLocation(), diag::note_ivar_decl);
	// Fall thru - see previous comment
	}
	// Fall thru - see previous comment
	if ((property->getType()->isObjCObjectPointerType() \|\|
	PropType.isObjCGCStrong()) && IvarType.isObjCGCWeak() &&
	getLangOpts().getGC() != LangOptions::NonGC) {
	Diag(PropertyDiagLoc, diag::err_strong_property)
	<< property->getDeclName() << Ivar->getDeclName();
	// Fall thru - see previous comment
	}
	}
	if (getLangOpts().ObjCAutoRefCount \|\| isARCWeak \|\|
	Ivar->getType().getObjCLifetime())
	checkARCPropertyImpl(*this, PropertyLoc, property, Ivar);
	} else if (PropertyIvar)
	// @dynamic
	Diag(PropertyDiagLoc, diag::err_dynamic_property_ivar_decl);

	assert (property && "ActOnPropertyImplDecl - property declaration missing");
	ObjCPropertyImplDecl *PIDecl =
	ObjCPropertyImplDecl::Create(Context, CurContext, AtLoc, PropertyLoc,
	property,
	(Synthesize ?
	ObjCPropertyImplDecl::Synthesize
	: ObjCPropertyImplDecl::Dynamic),
	Ivar, PropertyIvarLoc);

	if (CompleteTypeErr \|\| !compat)
	PIDecl->setInvalidDecl();

	if (ObjCMethodDecl *getterMethod = property->getGetterMethodDecl()) {
	getterMethod->createImplicitParams(Context, IDecl);
	if (getLangOpts().CPlusPlus && Synthesize && !CompleteTypeErr &&
	Ivar->getType()->isRecordType()) {
	// For Objective-C++, need to synthesize the AST for the IVAR object to be
	// returned by the getter as it must conform to C++'s copy-return rules.
	// FIXME. Eventually we want to do this for Objective-C as well.
	SynthesizedFunctionScope Scope(*this, getterMethod);
	ImplicitParamDecl *SelfDecl = getterMethod->getSelfDecl();
	DeclRefExpr *SelfExpr =
	new (Context) DeclRefExpr(SelfDecl, false, SelfDecl->getType(),
	VK_LValue, PropertyDiagLoc);
	MarkDeclRefReferenced(SelfExpr);
	Expr *LoadSelfExpr =
	ImplicitCastExpr::Create(Context, SelfDecl->getType(),
	CK_LValueToRValue, SelfExpr, nullptr,
	VK_RValue);
	Expr *IvarRefExpr =
	new (Context) ObjCIvarRefExpr(Ivar,
	Ivar->getUsageType(SelfDecl->getType()),
	PropertyDiagLoc,
	Ivar->getLocation(),
	LoadSelfExpr, true, true);
	ExprResult Res = PerformCopyInitialization(
	InitializedEntity::InitializeResult(PropertyDiagLoc,
	getterMethod->getReturnType(),
	/NRVO=/false),
	PropertyDiagLoc, IvarRefExpr);
	if (!Res.isInvalid()) {
	Expr *ResExpr = Res.getAs<Expr>();
	if (ResExpr)
	ResExpr = MaybeCreateExprWithCleanups(ResExpr);
	PIDecl->setGetterCXXConstructor(ResExpr);
	}
	}
	if (property->hasAttr<NSReturnsNotRetainedAttr>() &&
	!getterMethod->hasAttr<NSReturnsNotRetainedAttr>()) {
	Diag(getterMethod->getLocation(),
	diag::warn_property_getter_owning_mismatch);
	Diag(property->getLocation(), diag::note_property_declare);
	}
	if (getLangOpts().ObjCAutoRefCount && Synthesize)
	switch (getterMethod->getMethodFamily()) {
	case OMF_retain:
	case OMF_retainCount:
	case OMF_release:
	case OMF_autorelease:
	Diag(getterMethod->getLocation(), diag::err_arc_illegal_method_def)
	<< 1 << getterMethod->getSelector();
	break;
	default:
	break;
	}
	}
	if (ObjCMethodDecl *setterMethod = property->getSetterMethodDecl()) {
	setterMethod->createImplicitParams(Context, IDecl);
	if (getLangOpts().CPlusPlus && Synthesize && !CompleteTypeErr &&
	Ivar->getType()->isRecordType()) {
	// FIXME. Eventually we want to do this for Objective-C as well.
	SynthesizedFunctionScope Scope(*this, setterMethod);
	ImplicitParamDecl *SelfDecl = setterMethod->getSelfDecl();
	DeclRefExpr *SelfExpr =
	new (Context) DeclRefExpr(SelfDecl, false, SelfDecl->getType(),
	VK_LValue, PropertyDiagLoc);
	MarkDeclRefReferenced(SelfExpr);
	Expr *LoadSelfExpr =
	ImplicitCastExpr::Create(Context, SelfDecl->getType(),
	CK_LValueToRValue, SelfExpr, nullptr,
	VK_RValue);
	Expr *lhs =
	new (Context) ObjCIvarRefExpr(Ivar,
	Ivar->getUsageType(SelfDecl->getType()),
	PropertyDiagLoc,
	Ivar->getLocation(),
	LoadSelfExpr, true, true);
	ObjCMethodDecl::param_iterator P = setterMethod->param_begin();
	ParmVarDecl Param = (P);
	QualType T = Param->getType().getNonReferenceType();
	DeclRefExpr *rhs = new (Context) DeclRefExpr(Param, false, T,
	VK_LValue, PropertyDiagLoc);
	MarkDeclRefReferenced(rhs);
	ExprResult Res = BuildBinOp(S, PropertyDiagLoc,
	BO_Assign, lhs, rhs);
	if (property->getPropertyAttributes() &
	ObjCPropertyDecl::OBJC_PR_atomic) {
	Expr *callExpr = Res.getAs<Expr>();
	if (const CXXOperatorCallExpr *CXXCE =
	dyn_cast_or_null<CXXOperatorCallExpr>(callExpr))
	if (const FunctionDecl *FuncDecl = CXXCE->getDirectCallee())
	if (!FuncDecl->isTrivial())
	if (property->getType()->isReferenceType()) {
	Diag(PropertyDiagLoc,
	diag::err_atomic_property_nontrivial_assign_op)
	<< property->getType();
	Diag(FuncDecl->getLocStart(),
	diag::note_callee_decl) << FuncDecl;
	}
	}
	PIDecl->setSetterCXXAssignment(Res.getAs<Expr>());
	}
	}

	if (IC) {
	if (Synthesize)
	if (ObjCPropertyImplDecl *PPIDecl =
	IC->FindPropertyImplIvarDecl(PropertyIvar)) {
	Diag(PropertyLoc, diag::err_duplicate_ivar_use)
	<< PropertyId << PPIDecl->getPropertyDecl()->getIdentifier()
	<< PropertyIvar;
	Diag(PPIDecl->getLocation(), diag::note_previous_use);
	}

	if (ObjCPropertyImplDecl *PPIDecl
	= IC->FindPropertyImplDecl(PropertyId, QueryKind)) {
	Diag(PropertyLoc, diag::err_property_implemented) << PropertyId;
	Diag(PPIDecl->getLocation(), diag::note_previous_declaration);
	return nullptr;
	}
	IC->addPropertyImplementation(PIDecl);
	if (getLangOpts().ObjCDefaultSynthProperties &&
	getLangOpts().ObjCRuntime.isNonFragile() &&
	!IDecl->isObjCRequiresPropertyDefs()) {
	// Diagnose if an ivar was lazily synthesdized due to a previous
	// use and if 1) property is @dynamic or 2) property is synthesized
	// but it requires an ivar of different name.
	ObjCInterfaceDecl *ClassDeclared=nullptr;
	ObjCIvarDecl *Ivar = nullptr;
	if (!Synthesize)
	Ivar = IDecl->lookupInstanceVariable(PropertyId, ClassDeclared);
	else {
	if (PropertyIvar && PropertyIvar != PropertyId)
	Ivar = IDecl->lookupInstanceVariable(PropertyId, ClassDeclared);
	}
	// Issue diagnostics only if Ivar belongs to current class.
	if (Ivar && Ivar->getSynthesize() &&
	declaresSameEntity(IC->getClassInterface(), ClassDeclared)) {
	Diag(Ivar->getLocation(), diag::err_undeclared_var_use)
	<< PropertyId;
	Ivar->setInvalidDecl();
	}
	}
	} else {
	if (Synthesize)
	if (ObjCPropertyImplDecl *PPIDecl =
	CatImplClass->FindPropertyImplIvarDecl(PropertyIvar)) {
	Diag(PropertyDiagLoc, diag::err_duplicate_ivar_use)
	<< PropertyId << PPIDecl->getPropertyDecl()->getIdentifier()
	<< PropertyIvar;
	Diag(PPIDecl->getLocation(), diag::note_previous_use);
	}

	if (ObjCPropertyImplDecl *PPIDecl =
	CatImplClass->FindPropertyImplDecl(PropertyId, QueryKind)) {
	Diag(PropertyDiagLoc, diag::err_property_implemented) << PropertyId;
	Diag(PPIDecl->getLocation(), diag::note_previous_declaration);
	return nullptr;
	}
	CatImplClass->addPropertyImplementation(PIDecl);
	}

	return PIDecl;
	}

	//===----------------------------------------------------------------------===//
	// Helper methods.
	//===----------------------------------------------------------------------===//

	/// DiagnosePropertyMismatch - Compares two properties for their
	/// attributes and types and warns on a variety of inconsistencies.
	///
	void
	Sema::DiagnosePropertyMismatch(ObjCPropertyDecl *Property,
	ObjCPropertyDecl *SuperProperty,
	const IdentifierInfo *inheritedName,
	bool OverridingProtocolProperty) {
	ObjCPropertyDecl::PropertyAttributeKind CAttr =
	Property->getPropertyAttributes();
	ObjCPropertyDecl::PropertyAttributeKind SAttr =
	SuperProperty->getPropertyAttributes();

	// We allow readonly properties without an explicit ownership
	// (assign/unsafe_unretained/weak/retain/strong/copy) in super class
	// to be overridden by a property with any explicit ownership in the subclass.
	if (!OverridingProtocolProperty &&
	!getOwnershipRule(SAttr) && getOwnershipRule(CAttr))
	;
	else {
	if ((CAttr & ObjCPropertyDecl::OBJC_PR_readonly)
	&& (SAttr & ObjCPropertyDecl::OBJC_PR_readwrite))
	Diag(Property->getLocation(), diag::warn_readonly_property)
	<< Property->getDeclName() << inheritedName;
	if ((CAttr & ObjCPropertyDecl::OBJC_PR_copy)
	!= (SAttr & ObjCPropertyDecl::OBJC_PR_copy))
	Diag(Property->getLocation(), diag::warn_property_attribute)
	<< Property->getDeclName() << "copy" << inheritedName;
	else if (!(SAttr & ObjCPropertyDecl::OBJC_PR_readonly)){
	unsigned CAttrRetain =
	(CAttr &
	(ObjCPropertyDecl::OBJC_PR_retain \| ObjCPropertyDecl::OBJC_PR_strong));
	unsigned SAttrRetain =
	(SAttr &
	(ObjCPropertyDecl::OBJC_PR_retain \| ObjCPropertyDecl::OBJC_PR_strong));
	bool CStrong = (CAttrRetain != 0);
	bool SStrong = (SAttrRetain != 0);
	if (CStrong != SStrong)
	Diag(Property->getLocation(), diag::warn_property_attribute)
	<< Property->getDeclName() << "retain (or strong)" << inheritedName;
	}
	}

	// Check for nonatomic; note that nonatomic is effectively
	// meaningless for readonly properties, so don't diagnose if the
	// atomic property is 'readonly'.
	checkAtomicPropertyMismatch(*this, SuperProperty, Property, false);
	if (Property->getSetterName() != SuperProperty->getSetterName()) {
	Diag(Property->getLocation(), diag::warn_property_attribute)
	<< Property->getDeclName() << "setter" << inheritedName;
	Diag(SuperProperty->getLocation(), diag::note_property_declare);
	}
	if (Property->getGetterName() != SuperProperty->getGetterName()) {
	Diag(Property->getLocation(), diag::warn_property_attribute)
	<< Property->getDeclName() << "getter" << inheritedName;
	Diag(SuperProperty->getLocation(), diag::note_property_declare);
	}

	QualType LHSType =
	Context.getCanonicalType(SuperProperty->getType());
	QualType RHSType =
	Context.getCanonicalType(Property->getType());

	if (!Context.propertyTypesAreCompatible(LHSType, RHSType)) {
	// Do cases not handled in above.
	// FIXME. For future support of covariant property types, revisit this.
	bool IncompatibleObjC = false;
	QualType ConvertedType;
	if (!isObjCPointerConversion(RHSType, LHSType,
	ConvertedType, IncompatibleObjC) \|\|
	IncompatibleObjC) {
	Diag(Property->getLocation(), diag::warn_property_types_are_incompatible)
	<< Property->getType() << SuperProperty->getType() << inheritedName;
	Diag(SuperProperty->getLocation(), diag::note_property_declare);
	}
	}
	}

	bool Sema::DiagnosePropertyAccessorMismatch(ObjCPropertyDecl *property,
	ObjCMethodDecl *GetterMethod,
	SourceLocation Loc) {
	if (!GetterMethod)
	return false;
	QualType GetterType = GetterMethod->getReturnType().getNonReferenceType();
	QualType PropertyRValueType =
	property->getType().getNonReferenceType().getAtomicUnqualifiedType();
	bool compat = Context.hasSameType(PropertyRValueType, GetterType);
	if (!compat) {
	const ObjCObjectPointerType *propertyObjCPtr = nullptr;
	const ObjCObjectPointerType *getterObjCPtr = nullptr;
	if ((propertyObjCPtr =
	PropertyRValueType->getAs<ObjCObjectPointerType>()) &&
	(getterObjCPtr = GetterType->getAs<ObjCObjectPointerType>()))
	compat = Context.canAssignObjCInterfaces(getterObjCPtr, propertyObjCPtr);
	else if (CheckAssignmentConstraints(Loc, GetterType, PropertyRValueType)
	!= Compatible) {
	Diag(Loc, diag::err_property_accessor_type)
	<< property->getDeclName() << PropertyRValueType
	<< GetterMethod->getSelector() << GetterType;
	Diag(GetterMethod->getLocation(), diag::note_declared_at);
	return true;
	} else {
	compat = true;
	QualType lhsType = Context.getCanonicalType(PropertyRValueType);
	QualType rhsType =Context.getCanonicalType(GetterType).getUnqualifiedType();
	if (lhsType != rhsType && lhsType->isArithmeticType())
	compat = false;
	}
	}

	if (!compat) {
	Diag(Loc, diag::warn_accessor_property_type_mismatch)
	<< property->getDeclName()
	<< GetterMethod->getSelector();
	Diag(GetterMethod->getLocation(), diag::note_declared_at);
	return true;
	}

	return false;
	}

	/// CollectImmediateProperties - This routine collects all properties in
	/// the class and its conforming protocols; but not those in its super class.
	static void
	CollectImmediateProperties(ObjCContainerDecl *CDecl,
	ObjCContainerDecl::PropertyMap &PropMap,
	ObjCContainerDecl::PropertyMap &SuperPropMap,
	bool CollectClassPropsOnly = false,
	bool IncludeProtocols = true) {
	if (ObjCInterfaceDecl *IDecl = dyn_cast<ObjCInterfaceDecl>(CDecl)) {
	for (auto *Prop : IDecl->properties()) {
	if (CollectClassPropsOnly && !Prop->isClassProperty())
	continue;
	PropMap[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] =
	Prop;
	}

	// Collect the properties from visible extensions.
	for (auto *Ext : IDecl->visible_extensions())
	CollectImmediateProperties(Ext, PropMap, SuperPropMap,
	CollectClassPropsOnly, IncludeProtocols);

	if (IncludeProtocols) {
	// Scan through class's protocols.
	for (auto *PI : IDecl->all_referenced_protocols())
	CollectImmediateProperties(PI, PropMap, SuperPropMap,
	CollectClassPropsOnly);
	}
	}
	if (ObjCCategoryDecl *CATDecl = dyn_cast<ObjCCategoryDecl>(CDecl)) {
	for (auto *Prop : CATDecl->properties()) {
	if (CollectClassPropsOnly && !Prop->isClassProperty())
	continue;
	PropMap[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] =
	Prop;
	}
	if (IncludeProtocols) {
	// Scan through class's protocols.
	for (auto *PI : CATDecl->protocols())
	CollectImmediateProperties(PI, PropMap, SuperPropMap,
	CollectClassPropsOnly);
	}
	}
	else if (ObjCProtocolDecl *PDecl = dyn_cast<ObjCProtocolDecl>(CDecl)) {
	for (auto *Prop : PDecl->properties()) {
	if (CollectClassPropsOnly && !Prop->isClassProperty())
	continue;
	ObjCPropertyDecl *PropertyFromSuper =
	SuperPropMap[std::make_pair(Prop->getIdentifier(),
	Prop->isClassProperty())];
	// Exclude property for protocols which conform to class's super-class,
	// as super-class has to implement the property.
	if (!PropertyFromSuper \|\|
	PropertyFromSuper->getIdentifier() != Prop->getIdentifier()) {
	ObjCPropertyDecl *&PropEntry =
	PropMap[std::make_pair(Prop->getIdentifier(),
	Prop->isClassProperty())];
	if (!PropEntry)
	PropEntry = Prop;
	}
	}
	// Scan through protocol's protocols.
	for (auto *PI : PDecl->protocols())
	CollectImmediateProperties(PI, PropMap, SuperPropMap,
	CollectClassPropsOnly);
	}
	}

	/// CollectSuperClassPropertyImplementations - This routine collects list of
	/// properties to be implemented in super class(s) and also coming from their
	/// conforming protocols.
	static void CollectSuperClassPropertyImplementations(ObjCInterfaceDecl *CDecl,
	ObjCInterfaceDecl::PropertyMap &PropMap) {
	if (ObjCInterfaceDecl *SDecl = CDecl->getSuperClass()) {
	ObjCInterfaceDecl::PropertyDeclOrder PO;
	while (SDecl) {
	SDecl->collectPropertiesToImplement(PropMap, PO);
	SDecl = SDecl->getSuperClass();
	}
	}
	}

	/// IvarBacksCurrentMethodAccessor - This routine returns 'true' if 'IV' is
	/// an ivar synthesized for 'Method' and 'Method' is a property accessor
	/// declared in class 'IFace'.
	bool
	Sema::IvarBacksCurrentMethodAccessor(ObjCInterfaceDecl *IFace,
	ObjCMethodDecl Method, ObjCIvarDecl IV) {
	if (!IV->getSynthesize())
	return false;
	ObjCMethodDecl *IMD = IFace->lookupMethod(Method->getSelector(),
	Method->isInstanceMethod());
	if (!IMD \|\| !IMD->isPropertyAccessor())
	return false;

	// look up a property declaration whose one of its accessors is implemented
	// by this method.
	for (const auto *Property : IFace->instance_properties()) {
	if ((Property->getGetterName() == IMD->getSelector() \|\|
	Property->getSetterName() == IMD->getSelector()) &&
	(Property->getPropertyIvarDecl() == IV))
	return true;
	}
	// Also look up property declaration in class extension whose one of its
	// accessors is implemented by this method.
	for (const auto *Ext : IFace->known_extensions())
	for (const auto *Property : Ext->instance_properties())
	if ((Property->getGetterName() == IMD->getSelector() \|\|
	Property->getSetterName() == IMD->getSelector()) &&
	(Property->getPropertyIvarDecl() == IV))
	return true;
	return false;
	}

	static bool SuperClassImplementsProperty(ObjCInterfaceDecl *IDecl,
	ObjCPropertyDecl *Prop) {
	bool SuperClassImplementsGetter = false;
	bool SuperClassImplementsSetter = false;
	if (Prop->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_readonly)
	SuperClassImplementsSetter = true;

	while (IDecl->getSuperClass()) {
	ObjCInterfaceDecl *SDecl = IDecl->getSuperClass();
	if (!SuperClassImplementsGetter && SDecl->getInstanceMethod(Prop->getGetterName()))
	SuperClassImplementsGetter = true;

	if (!SuperClassImplementsSetter && SDecl->getInstanceMethod(Prop->getSetterName()))
	SuperClassImplementsSetter = true;
	if (SuperClassImplementsGetter && SuperClassImplementsSetter)
	return true;
	IDecl = IDecl->getSuperClass();
	}
	return false;
	}

	/// \brief Default synthesizes all properties which must be synthesized
	/// in class's \@implementation.
	void Sema::DefaultSynthesizeProperties(Scope S, ObjCImplDecl IMPDecl,
	ObjCInterfaceDecl *IDecl,
	SourceLocation AtEnd) {
	ObjCInterfaceDecl::PropertyMap PropMap;
	ObjCInterfaceDecl::PropertyDeclOrder PropertyOrder;
	IDecl->collectPropertiesToImplement(PropMap, PropertyOrder);
	if (PropMap.empty())
	return;
	ObjCInterfaceDecl::PropertyMap SuperPropMap;
	CollectSuperClassPropertyImplementations(IDecl, SuperPropMap);

	for (unsigned i = 0, e = PropertyOrder.size(); i != e; i++) {
	ObjCPropertyDecl *Prop = PropertyOrder[i];
	// Is there a matching property synthesize/dynamic?
	if (Prop->isInvalidDecl() \|\|
	Prop->isClassProperty() \|\|
	Prop->getPropertyImplementation() == ObjCPropertyDecl::Optional)
	continue;
	// Property may have been synthesized by user.
	if (IMPDecl->FindPropertyImplDecl(
	Prop->getIdentifier(), Prop->getQueryKind()))
	continue;
	if (IMPDecl->getInstanceMethod(Prop->getGetterName())) {
	if (Prop->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_readonly)
	continue;
	if (IMPDecl->getInstanceMethod(Prop->getSetterName()))
	continue;
	}
	if (ObjCPropertyImplDecl *PID =
	IMPDecl->FindPropertyImplIvarDecl(Prop->getIdentifier())) {
	Diag(Prop->getLocation(), diag::warn_no_autosynthesis_shared_ivar_property)
	<< Prop->getIdentifier();
	if (PID->getLocation().isValid())
	Diag(PID->getLocation(), diag::note_property_synthesize);
	continue;
	}
	ObjCPropertyDecl *PropInSuperClass =
	SuperPropMap[std::make_pair(Prop->getIdentifier(),
	Prop->isClassProperty())];
	if (ObjCProtocolDecl *Proto =
	dyn_cast<ObjCProtocolDecl>(Prop->getDeclContext())) {
	// We won't auto-synthesize properties declared in protocols.
	// Suppress the warning if class's superclass implements property's
	// getter and implements property's setter (if readwrite property).
	// Or, if property is going to be implemented in its super class.
	if (!SuperClassImplementsProperty(IDecl, Prop) && !PropInSuperClass) {
	Diag(IMPDecl->getLocation(),
	diag::warn_auto_synthesizing_protocol_property)
	<< Prop << Proto;
	Diag(Prop->getLocation(), diag::note_property_declare);
	std::string FixIt =
	(Twine("@synthesize ") + Prop->getName() + ";\n\n").str();
	Diag(AtEnd, diag::note_add_synthesize_directive)
	<< FixItHint::CreateInsertion(AtEnd, FixIt);
	}
	continue;
	}
	// If property to be implemented in the super class, ignore.
	if (PropInSuperClass) {
	if ((Prop->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_readwrite) &&
	(PropInSuperClass->getPropertyAttributes() &
	ObjCPropertyDecl::OBJC_PR_readonly) &&
	!IMPDecl->getInstanceMethod(Prop->getSetterName()) &&
	!IDecl->HasUserDeclaredSetterMethod(Prop)) {
	Diag(Prop->getLocation(), diag::warn_no_autosynthesis_property)
	<< Prop->getIdentifier();
	Diag(PropInSuperClass->getLocation(), diag::note_property_declare);
	}
	else {
	Diag(Prop->getLocation(), diag::warn_autosynthesis_property_in_superclass)
	<< Prop->getIdentifier();
	Diag(PropInSuperClass->getLocation(), diag::note_property_declare);
	Diag(IMPDecl->getLocation(), diag::note_while_in_implementation);
	}
	continue;
	}
	// We use invalid SourceLocations for the synthesized ivars since they
	// aren't really synthesized at a particular location; they just exist.
	// Saying that they are located at the @implementation isn't really going
	// to help users.
	ObjCPropertyImplDecl *PIDecl = dyn_cast_or_null<ObjCPropertyImplDecl>(
	ActOnPropertyImplDecl(S, SourceLocation(), SourceLocation(),
	true,
	/* property = */ Prop->getIdentifier(),
	/* ivar = */ Prop->getDefaultSynthIvarName(Context),
	Prop->getLocation(), Prop->getQueryKind()));
	if (PIDecl) {
	Diag(Prop->getLocation(), diag::warn_missing_explicit_synthesis);
	Diag(IMPDecl->getLocation(), diag::note_while_in_implementation);
	}
	}
	}

	void Sema::DefaultSynthesizeProperties(Scope S, Decl D,
	SourceLocation AtEnd) {
	if (!LangOpts.ObjCDefaultSynthProperties \|\| LangOpts.ObjCRuntime.isFragile())
	return;
	ObjCImplementationDecl *IC=dyn_cast_or_null<ObjCImplementationDecl>(D);
	if (!IC)
	return;
	if (ObjCInterfaceDecl* IDecl = IC->getClassInterface())
	if (!IDecl->isObjCRequiresPropertyDefs())
	DefaultSynthesizeProperties(S, IC, IDecl, AtEnd);
	}

	static void DiagnoseUnimplementedAccessor(
	Sema &S, ObjCInterfaceDecl *PrimaryClass, Selector Method,
	ObjCImplDecl IMPDecl, ObjCContainerDecl CDecl, ObjCCategoryDecl *C,
	ObjCPropertyDecl *Prop,
	llvm::SmallPtrSet<const ObjCMethodDecl *, 8> &SMap) {
	// Check to see if we have a corresponding selector in SMap and with the
	// right method type.
	auto I = std::find_if(SMap.begin(), SMap.end(),
	[&](const ObjCMethodDecl *x) {
	return x->getSelector() == Method &&
	x->isClassMethod() == Prop->isClassProperty();
	});
	// When reporting on missing property setter/getter implementation in
	// categories, do not report when they are declared in primary class,
	// class's protocol, or one of it super classes. This is because,
	// the class is going to implement them.
	if (I == SMap.end() &&
	(PrimaryClass == nullptr \|\|
	!PrimaryClass->lookupPropertyAccessor(Method, C,
	Prop->isClassProperty()))) {
	unsigned diag =
	isa<ObjCCategoryDecl>(CDecl)
	? (Prop->isClassProperty()
	? diag::warn_impl_required_in_category_for_class_property
	: diag::warn_setter_getter_impl_required_in_category)
	: (Prop->isClassProperty()
	? diag::warn_impl_required_for_class_property
	: diag::warn_setter_getter_impl_required);
	S.Diag(IMPDecl->getLocation(), diag) << Prop->getDeclName() << Method;
	S.Diag(Prop->getLocation(), diag::note_property_declare);
	if (S.LangOpts.ObjCDefaultSynthProperties &&
	S.LangOpts.ObjCRuntime.isNonFragile())
	if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(CDecl))
	if (const ObjCInterfaceDecl *RID = ID->isObjCRequiresPropertyDefs())
	S.Diag(RID->getLocation(), diag::note_suppressed_class_declare);
	}
	}

	void Sema::DiagnoseUnimplementedProperties(Scope S, ObjCImplDecl IMPDecl,
	ObjCContainerDecl *CDecl,
	bool SynthesizeProperties) {
	ObjCContainerDecl::PropertyMap PropMap;
	ObjCInterfaceDecl *IDecl = dyn_cast<ObjCInterfaceDecl>(CDecl);

	// Since we don't synthesize class properties, we should emit diagnose even
	// if SynthesizeProperties is true.
	ObjCContainerDecl::PropertyMap NoNeedToImplPropMap;
	// Gather properties which need not be implemented in this class
	// or category.
	if (!IDecl)
	if (ObjCCategoryDecl *C = dyn_cast<ObjCCategoryDecl>(CDecl)) {
	// For categories, no need to implement properties declared in
	// its primary class (and its super classes) if property is
	// declared in one of those containers.
	if ((IDecl = C->getClassInterface())) {
	ObjCInterfaceDecl::PropertyDeclOrder PO;
	IDecl->collectPropertiesToImplement(NoNeedToImplPropMap, PO);
	}
	}
	if (IDecl)
	CollectSuperClassPropertyImplementations(IDecl, NoNeedToImplPropMap);

	// When SynthesizeProperties is true, we only check class properties.
	CollectImmediateProperties(CDecl, PropMap, NoNeedToImplPropMap,
	SynthesizeProperties/CollectClassPropsOnly/);

	// Scan the @interface to see if any of the protocols it adopts
	// require an explicit implementation, via attribute
	// 'objc_protocol_requires_explicit_implementation'.
	if (IDecl) {
	std::unique_ptr<ObjCContainerDecl::PropertyMap> LazyMap;

	for (auto *PDecl : IDecl->all_referenced_protocols()) {
	if (!PDecl->hasAttr<ObjCExplicitProtocolImplAttr>())
	continue;
	// Lazily construct a set of all the properties in the @interface
	// of the class, without looking at the superclass. We cannot
	// use the call to CollectImmediateProperties() above as that
	// utilizes information from the super class's properties as well
	// as scans the adopted protocols. This work only triggers for protocols
	// with the attribute, which is very rare, and only occurs when
	// analyzing the @implementation.
	if (!LazyMap) {
	ObjCContainerDecl::PropertyMap NoNeedToImplPropMap;
	LazyMap.reset(new ObjCContainerDecl::PropertyMap());
	CollectImmediateProperties(CDecl, *LazyMap, NoNeedToImplPropMap,
	/* CollectClassPropsOnly */ false,
	/* IncludeProtocols */ false);
	}
	// Add the properties of 'PDecl' to the list of properties that
	// need to be implemented.
	for (auto *PropDecl : PDecl->properties()) {
	if ((*LazyMap)[std::make_pair(PropDecl->getIdentifier(),
	PropDecl->isClassProperty())])
	continue;
	PropMap[std::make_pair(PropDecl->getIdentifier(),
	PropDecl->isClassProperty())] = PropDecl;
	}
	}
	}

	if (PropMap.empty())
	return;

	llvm::DenseSet<ObjCPropertyDecl *> PropImplMap;
	for (const auto *I : IMPDecl->property_impls())
	PropImplMap.insert(I->getPropertyDecl());

	llvm::SmallPtrSet<const ObjCMethodDecl *, 8> InsMap;
	// Collect property accessors implemented in current implementation.
	for (const auto *I : IMPDecl->methods())
	InsMap.insert(I);

	ObjCCategoryDecl *C = dyn_cast<ObjCCategoryDecl>(CDecl);
	ObjCInterfaceDecl *PrimaryClass = nullptr;
	if (C && !C->IsClassExtension())
	if ((PrimaryClass = C->getClassInterface()))
	// Report unimplemented properties in the category as well.
	if (ObjCImplDecl *IMP = PrimaryClass->getImplementation()) {
	// When reporting on missing setter/getters, do not report when
	// setter/getter is implemented in category's primary class
	// implementation.
	for (const auto *I : IMP->methods())
	InsMap.insert(I);
	}

	for (ObjCContainerDecl::PropertyMap::iterator
	P = PropMap.begin(), E = PropMap.end(); P != E; ++P) {
	ObjCPropertyDecl *Prop = P->second;
	// Is there a matching property synthesize/dynamic?
	if (Prop->isInvalidDecl() \|\|
	Prop->getPropertyImplementation() == ObjCPropertyDecl::Optional \|\|
	PropImplMap.count(Prop) \|\|
	Prop->getAvailability() == AR_Unavailable)
	continue;

	// Diagnose unimplemented getters and setters.
	DiagnoseUnimplementedAccessor(*this,
	PrimaryClass, Prop->getGetterName(), IMPDecl, CDecl, C, Prop, InsMap);
	if (!Prop->isReadOnly())
	DiagnoseUnimplementedAccessor(*this,
	PrimaryClass, Prop->getSetterName(),
	IMPDecl, CDecl, C, Prop, InsMap);
	}
	}

	void Sema::diagnoseNullResettableSynthesizedSetters(const ObjCImplDecl *impDecl) {
	for (const auto *propertyImpl : impDecl->property_impls()) {
	const auto *property = propertyImpl->getPropertyDecl();

	// Warn about null_resettable properties with synthesized setters,
	// because the setter won't properly handle nil.
	if (propertyImpl->getPropertyImplementation()
	== ObjCPropertyImplDecl::Synthesize &&
	(property->getPropertyAttributes() &
	ObjCPropertyDecl::OBJC_PR_null_resettable) &&
	property->getGetterMethodDecl() &&
	property->getSetterMethodDecl()) {
	auto *getterMethod = property->getGetterMethodDecl();
	auto *setterMethod = property->getSetterMethodDecl();
	if (!impDecl->getInstanceMethod(setterMethod->getSelector()) &&
	!impDecl->getInstanceMethod(getterMethod->getSelector())) {
	SourceLocation loc = propertyImpl->getLocation();
	if (loc.isInvalid())
	loc = impDecl->getLocStart();

	Diag(loc, diag::warn_null_resettable_setter)
	<< setterMethod->getSelector() << property->getDeclName();
	}
	}
	}
	}

	void
	Sema::AtomicPropertySetterGetterRules (ObjCImplDecl* IMPDecl,
	ObjCInterfaceDecl* IDecl) {
	// Rules apply in non-GC mode only
	if (getLangOpts().getGC() != LangOptions::NonGC)
	return;
	ObjCContainerDecl::PropertyMap PM;
	for (auto *Prop : IDecl->properties())
	PM[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] = Prop;
	for (const auto *Ext : IDecl->known_extensions())
	for (auto *Prop : Ext->properties())
	PM[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] = Prop;

	for (ObjCContainerDecl::PropertyMap::iterator I = PM.begin(), E = PM.end();
	I != E; ++I) {
	const ObjCPropertyDecl *Property = I->second;
	ObjCMethodDecl *GetterMethod = nullptr;
	ObjCMethodDecl *SetterMethod = nullptr;
	bool LookedUpGetterSetter = false;

	unsigned Attributes = Property->getPropertyAttributes();
	unsigned AttributesAsWritten = Property->getPropertyAttributesAsWritten();

	if (!(AttributesAsWritten & ObjCPropertyDecl::OBJC_PR_atomic) &&
	!(AttributesAsWritten & ObjCPropertyDecl::OBJC_PR_nonatomic)) {
	GetterMethod = Property->isClassProperty() ?
	IMPDecl->getClassMethod(Property->getGetterName()) :
	IMPDecl->getInstanceMethod(Property->getGetterName());
	SetterMethod = Property->isClassProperty() ?
	IMPDecl->getClassMethod(Property->getSetterName()) :
	IMPDecl->getInstanceMethod(Property->getSetterName());
	LookedUpGetterSetter = true;
	if (GetterMethod) {
	Diag(GetterMethod->getLocation(),
	diag::warn_default_atomic_custom_getter_setter)
	<< Property->getIdentifier() << 0;
	Diag(Property->getLocation(), diag::note_property_declare);
	}
	if (SetterMethod) {
	Diag(SetterMethod->getLocation(),
	diag::warn_default_atomic_custom_getter_setter)
	<< Property->getIdentifier() << 1;
	Diag(Property->getLocation(), diag::note_property_declare);
	}
	}

	// We only care about readwrite atomic property.
	if ((Attributes & ObjCPropertyDecl::OBJC_PR_nonatomic) \|\|
	!(Attributes & ObjCPropertyDecl::OBJC_PR_readwrite))
	continue;
	if (const ObjCPropertyImplDecl *PIDecl = IMPDecl->FindPropertyImplDecl(
	Property->getIdentifier(), Property->getQueryKind())) {
	if (PIDecl->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic)
	continue;
	if (!LookedUpGetterSetter) {
	GetterMethod = Property->isClassProperty() ?
	IMPDecl->getClassMethod(Property->getGetterName()) :
	IMPDecl->getInstanceMethod(Property->getGetterName());
	SetterMethod = Property->isClassProperty() ?
	IMPDecl->getClassMethod(Property->getSetterName()) :
	IMPDecl->getInstanceMethod(Property->getSetterName());
	}
	if ((GetterMethod && !SetterMethod) \|\| (!GetterMethod && SetterMethod)) {
	SourceLocation MethodLoc =
	(GetterMethod ? GetterMethod->getLocation()
	: SetterMethod->getLocation());
	Diag(MethodLoc, diag::warn_atomic_property_rule)
	<< Property->getIdentifier() << (GetterMethod != nullptr)
	<< (SetterMethod != nullptr);
	// fixit stuff.
	if (Property->getLParenLoc().isValid() &&
	!(AttributesAsWritten & ObjCPropertyDecl::OBJC_PR_atomic)) {
	// @property () ... case.
	SourceLocation AfterLParen =
	getLocForEndOfToken(Property->getLParenLoc());
	StringRef NonatomicStr = AttributesAsWritten? "nonatomic, "
	: "nonatomic";
	Diag(Property->getLocation(),
	diag::note_atomic_property_fixup_suggest)
	<< FixItHint::CreateInsertion(AfterLParen, NonatomicStr);
	} else if (Property->getLParenLoc().isInvalid()) {
	//@property id etc.
	SourceLocation startLoc =
	Property->getTypeSourceInfo()->getTypeLoc().getBeginLoc();
	Diag(Property->getLocation(),
	diag::note_atomic_property_fixup_suggest)
	<< FixItHint::CreateInsertion(startLoc, "(nonatomic) ");
	}
	else
	Diag(MethodLoc, diag::note_atomic_property_fixup_suggest);
	Diag(Property->getLocation(), diag::note_property_declare);
	}
	}
	}
	}

	void Sema::DiagnoseOwningPropertyGetterSynthesis(const ObjCImplementationDecl *D) {
	if (getLangOpts().getGC() == LangOptions::GCOnly)
	return;

	for (const auto *PID : D->property_impls()) {
	const ObjCPropertyDecl *PD = PID->getPropertyDecl();
	if (PD && !PD->hasAttr<NSReturnsNotRetainedAttr>() &&
	!PD->isClassProperty() &&
	!D->getInstanceMethod(PD->getGetterName())) {
	ObjCMethodDecl *method = PD->getGetterMethodDecl();
	if (!method)
	continue;
	ObjCMethodFamily family = method->getMethodFamily();
	if (family == OMF_alloc \|\| family == OMF_copy \|\|
	family == OMF_mutableCopy \|\| family == OMF_new) {
	if (getLangOpts().ObjCAutoRefCount)
	Diag(PD->getLocation(), diag::err_cocoa_naming_owned_rule);
	else
	Diag(PD->getLocation(), diag::warn_cocoa_naming_owned_rule);

	// Look for a getter explicitly declared alongside the property.
	// If we find one, use its location for the note.
	SourceLocation noteLoc = PD->getLocation();
	SourceLocation fixItLoc;
	for (auto *getterRedecl : method->redecls()) {
	if (getterRedecl->isImplicit())
	continue;
	if (getterRedecl->getDeclContext() != PD->getDeclContext())
	continue;
	noteLoc = getterRedecl->getLocation();
	fixItLoc = getterRedecl->getLocEnd();
	}

	Preprocessor &PP = getPreprocessor();
	TokenValue tokens[] = {
	tok::kw___attribute, tok::l_paren, tok::l_paren,
	PP.getIdentifierInfo("objc_method_family"), tok::l_paren,
	PP.getIdentifierInfo("none"), tok::r_paren,
	tok::r_paren, tok::r_paren
	};
	StringRef spelling = "__attribute__((objc_method_family(none)))";
	StringRef macroName = PP.getLastMacroWithSpelling(noteLoc, tokens);
	if (!macroName.empty())
	spelling = macroName;

	auto noteDiag = Diag(noteLoc, diag::note_cocoa_naming_declare_family)
	<< method->getDeclName() << spelling;
	if (fixItLoc.isValid()) {
	SmallString<64> fixItText(" ");
	fixItText += spelling;
	noteDiag << FixItHint::CreateInsertion(fixItLoc, fixItText);
	}
	}
	}
	}
	}

	void Sema::DiagnoseMissingDesignatedInitOverrides(
	const ObjCImplementationDecl *ImplD,
	const ObjCInterfaceDecl *IFD) {
	assert(IFD->hasDesignatedInitializers());
	const ObjCInterfaceDecl *SuperD = IFD->getSuperClass();
	if (!SuperD)
	return;

	SelectorSet InitSelSet;
	for (const auto *I : ImplD->instance_methods())
	if (I->getMethodFamily() == OMF_init)
	InitSelSet.insert(I->getSelector());

	SmallVector<const ObjCMethodDecl *, 8> DesignatedInits;
	SuperD->getDesignatedInitializers(DesignatedInits);
	for (SmallVector<const ObjCMethodDecl *, 8>::iterator
	I = DesignatedInits.begin(), E = DesignatedInits.end(); I != E; ++I) {
	const ObjCMethodDecl MD = I;
	if (!InitSelSet.count(MD->getSelector())) {
	bool Ignore = false;
	if (auto *IMD = IFD->getInstanceMethod(MD->getSelector())) {
	Ignore = IMD->isUnavailable();
	}
	if (!Ignore) {
	Diag(ImplD->getLocation(),
	diag::warn_objc_implementation_missing_designated_init_override)
	<< MD->getSelector();
	Diag(MD->getLocation(), diag::note_objc_designated_init_marked_here);
	}
	}
	}
	}

	/// AddPropertyAttrs - Propagates attributes from a property to the
	/// implicitly-declared getter or setter for that property.
	static void AddPropertyAttrs(Sema &S, ObjCMethodDecl *PropertyMethod,
	ObjCPropertyDecl *Property) {
	// Should we just clone all attributes over?
	for (const auto *A : Property->attrs()) {
	if (isa<DeprecatedAttr>(A) \|\|
	isa<UnavailableAttr>(A) \|\|
	isa<AvailabilityAttr>(A))
	PropertyMethod->addAttr(A->clone(S.Context));
	}
	}

	/// ProcessPropertyDecl - Make sure that any user-defined setter/getter methods
	/// have the property type and issue diagnostics if they don't.
	/// Also synthesize a getter/setter method if none exist (and update the
	/// appropriate lookup tables.
	void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property) {
	ObjCMethodDecl GetterMethod, SetterMethod;
	ObjCContainerDecl *CD = cast<ObjCContainerDecl>(property->getDeclContext());
	if (CD->isInvalidDecl())
	return;

	bool IsClassProperty = property->isClassProperty();
	GetterMethod = IsClassProperty ?
	CD->getClassMethod(property->getGetterName()) :
	CD->getInstanceMethod(property->getGetterName());

	// if setter or getter is not found in class extension, it might be
	// in the primary class.
	if (!GetterMethod)
	if (const ObjCCategoryDecl *CatDecl = dyn_cast<ObjCCategoryDecl>(CD))
	if (CatDecl->IsClassExtension())
	GetterMethod = IsClassProperty ? CatDecl->getClassInterface()->
	getClassMethod(property->getGetterName()) :
	CatDecl->getClassInterface()->
	getInstanceMethod(property->getGetterName());

	SetterMethod = IsClassProperty ?
	CD->getClassMethod(property->getSetterName()) :
	CD->getInstanceMethod(property->getSetterName());
	if (!SetterMethod)
	if (const ObjCCategoryDecl *CatDecl = dyn_cast<ObjCCategoryDecl>(CD))
	if (CatDecl->IsClassExtension())
	SetterMethod = IsClassProperty ? CatDecl->getClassInterface()->
	getClassMethod(property->getSetterName()) :
	CatDecl->getClassInterface()->
	getInstanceMethod(property->getSetterName());
	DiagnosePropertyAccessorMismatch(property, GetterMethod,
	property->getLocation());

	if (!property->isReadOnly() && SetterMethod) {
	if (Context.getCanonicalType(SetterMethod->getReturnType()) !=
	Context.VoidTy)
	Diag(SetterMethod->getLocation(), diag::err_setter_type_void);
	if (SetterMethod->param_size() != 1 \|\|
	!Context.hasSameUnqualifiedType(
	(*SetterMethod->param_begin())->getType().getNonReferenceType(),
	property->getType().getNonReferenceType())) {
	Diag(property->getLocation(),
	diag::warn_accessor_property_type_mismatch)
	<< property->getDeclName()
	<< SetterMethod->getSelector();
	Diag(SetterMethod->getLocation(), diag::note_declared_at);
	}
	}

	// Synthesize getter/setter methods if none exist.
	// Find the default getter and if one not found, add one.
	// FIXME: The synthesized property we set here is misleading. We almost always
	// synthesize these methods unless the user explicitly provided prototypes
	// (which is odd, but allowed). Sema should be typechecking that the
	// declarations jive in that situation (which it is not currently).
	if (!GetterMethod) {
	// No instance/class method of same name as property getter name was found.
	// Declare a getter method and add it to the list of methods
	// for this class.
	SourceLocation Loc = property->getLocation();

	// The getter returns the declared property type with all qualifiers
	// removed.
	QualType resultTy = property->getType().getAtomicUnqualifiedType();

	// If the property is null_resettable, the getter returns nonnull.
	if (property->getPropertyAttributes() &
	ObjCPropertyDecl::OBJC_PR_null_resettable) {
	QualType modifiedTy = resultTy;
	if (auto nullability = AttributedType::stripOuterNullability(modifiedTy)) {
	if (*nullability == NullabilityKind::Unspecified)
	resultTy = Context.getAttributedType(AttributedType::attr_nonnull,
	modifiedTy, modifiedTy);
	}
	}

	GetterMethod = ObjCMethodDecl::Create(Context, Loc, Loc,
	property->getGetterName(),
	resultTy, nullptr, CD,
	!IsClassProperty, /isVariadic=/false,
	/isPropertyAccessor=/true,
	/isImplicitlyDeclared=/true, /isDefined=/false,
	(property->getPropertyImplementation() ==
	ObjCPropertyDecl::Optional) ?
	ObjCMethodDecl::Optional :
	ObjCMethodDecl::Required);
	CD->addDecl(GetterMethod);

	AddPropertyAttrs(*this, GetterMethod, property);

	if (property->hasAttr<NSReturnsNotRetainedAttr>())
	GetterMethod->addAttr(NSReturnsNotRetainedAttr::CreateImplicit(Context,
	Loc));

	if (property->hasAttr<ObjCReturnsInnerPointerAttr>())
	GetterMethod->addAttr(
	ObjCReturnsInnerPointerAttr::CreateImplicit(Context, Loc));

	if (const SectionAttr *SA = property->getAttr<SectionAttr>())
	GetterMethod->addAttr(
	SectionAttr::CreateImplicit(Context, SectionAttr::GNU_section,
	SA->getName(), Loc));

	if (getLangOpts().ObjCAutoRefCount)
	CheckARCMethodDecl(GetterMethod);
	} else
	// A user declared getter will be synthesize when @synthesize of
	// the property with the same name is seen in the @implementation
	GetterMethod->setPropertyAccessor(true);
	property->setGetterMethodDecl(GetterMethod);

	// Skip setter if property is read-only.
	if (!property->isReadOnly()) {
	// Find the default setter and if one not found, add one.
	if (!SetterMethod) {
	// No instance/class method of same name as property setter name was
	// found.
	// Declare a setter method and add it to the list of methods
	// for this class.
	SourceLocation Loc = property->getLocation();

	SetterMethod =
	ObjCMethodDecl::Create(Context, Loc, Loc,
	property->getSetterName(), Context.VoidTy,
	nullptr, CD, !IsClassProperty,
	/isVariadic=/false,
	/isPropertyAccessor=/true,
	/isImplicitlyDeclared=/true,
	/isDefined=/false,
	(property->getPropertyImplementation() ==
	ObjCPropertyDecl::Optional) ?
	ObjCMethodDecl::Optional :
	ObjCMethodDecl::Required);

	// Remove all qualifiers from the setter's parameter type.
	QualType paramTy =
	property->getType().getUnqualifiedType().getAtomicUnqualifiedType();

	// If the property is null_resettable, the setter accepts a
	// nullable value.
	if (property->getPropertyAttributes() &
	ObjCPropertyDecl::OBJC_PR_null_resettable) {
	QualType modifiedTy = paramTy;
	if (auto nullability = AttributedType::stripOuterNullability(modifiedTy)){
	if (*nullability == NullabilityKind::Unspecified)
	paramTy = Context.getAttributedType(AttributedType::attr_nullable,
	modifiedTy, modifiedTy);
	}
	}

	// Invent the arguments for the setter. We don't bother making a
	// nice name for the argument.
	ParmVarDecl *Argument = ParmVarDecl::Create(Context, SetterMethod,
	Loc, Loc,
	property->getIdentifier(),
	paramTy,
	/TInfo=/nullptr,
	SC_None,
	nullptr);
	SetterMethod->setMethodParams(Context, Argument, None);

	AddPropertyAttrs(*this, SetterMethod, property);

	CD->addDecl(SetterMethod);
	if (const SectionAttr *SA = property->getAttr<SectionAttr>())
	SetterMethod->addAttr(
	SectionAttr::CreateImplicit(Context, SectionAttr::GNU_section,
	SA->getName(), Loc));
	// It's possible for the user to have set a very odd custom
	// setter selector that causes it to have a method family.
	if (getLangOpts().ObjCAutoRefCount)
	CheckARCMethodDecl(SetterMethod);
	} else
	// A user declared setter will be synthesize when @synthesize of
	// the property with the same name is seen in the @implementation
	SetterMethod->setPropertyAccessor(true);
	property->setSetterMethodDecl(SetterMethod);
	}
	// Add any synthesized methods to the global pool. This allows us to
	// handle the following, which is supported by GCC (and part of the design).
	//
	// @interface Foo
	// @property double bar;
	// @end
	//
	// void thisIsUnfortunate() {
	// id foo;
	// double bar = [foo bar];
	// }
	//
	if (!IsClassProperty) {
	if (GetterMethod)
	AddInstanceMethodToGlobalPool(GetterMethod);
	if (SetterMethod)
	AddInstanceMethodToGlobalPool(SetterMethod);
	} else {
	if (GetterMethod)
	AddFactoryMethodToGlobalPool(GetterMethod);
	if (SetterMethod)
	AddFactoryMethodToGlobalPool(SetterMethod);
	}

	ObjCInterfaceDecl *CurrentClass = dyn_cast<ObjCInterfaceDecl>(CD);
	if (!CurrentClass) {
	if (ObjCCategoryDecl *Cat = dyn_cast<ObjCCategoryDecl>(CD))
	CurrentClass = Cat->getClassInterface();
	else if (ObjCImplDecl *Impl = dyn_cast<ObjCImplDecl>(CD))
	CurrentClass = Impl->getClassInterface();
	}
	if (GetterMethod)
	CheckObjCMethodOverrides(GetterMethod, CurrentClass, Sema::RTC_Unknown);
	if (SetterMethod)
	CheckObjCMethodOverrides(SetterMethod, CurrentClass, Sema::RTC_Unknown);
	}

	void Sema::CheckObjCPropertyAttributes(Decl *PDecl,
	SourceLocation Loc,
	unsigned &Attributes,
	bool propertyInPrimaryClass) {
	// FIXME: Improve the reported location.
	if (!PDecl \|\| PDecl->isInvalidDecl())
	return;

	if ((Attributes & ObjCDeclSpec::DQ_PR_readonly) &&
	(Attributes & ObjCDeclSpec::DQ_PR_readwrite))
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "readonly" << "readwrite";

	ObjCPropertyDecl *PropertyDecl = cast<ObjCPropertyDecl>(PDecl);
	QualType PropertyTy = PropertyDecl->getType();

	// Check for copy or retain on non-object types.
	if ((Attributes & (ObjCDeclSpec::DQ_PR_weak \| ObjCDeclSpec::DQ_PR_copy \|
	ObjCDeclSpec::DQ_PR_retain \| ObjCDeclSpec::DQ_PR_strong)) &&
	!PropertyTy->isObjCRetainableType() &&
	!PropertyDecl->hasAttr<ObjCNSObjectAttr>()) {
	Diag(Loc, diag::err_objc_property_requires_object)
	<< (Attributes & ObjCDeclSpec::DQ_PR_weak ? "weak" :
	Attributes & ObjCDeclSpec::DQ_PR_copy ? "copy" : "retain (or strong)");
	Attributes &= ~(ObjCDeclSpec::DQ_PR_weak \| ObjCDeclSpec::DQ_PR_copy \|
	ObjCDeclSpec::DQ_PR_retain \| ObjCDeclSpec::DQ_PR_strong);
	PropertyDecl->setInvalidDecl();
	}

	// Check for more than one of { assign, copy, retain }.
	if (Attributes & ObjCDeclSpec::DQ_PR_assign) {
	if (Attributes & ObjCDeclSpec::DQ_PR_copy) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "assign" << "copy";
	Attributes &= ~ObjCDeclSpec::DQ_PR_copy;
	}
	if (Attributes & ObjCDeclSpec::DQ_PR_retain) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "assign" << "retain";
	Attributes &= ~ObjCDeclSpec::DQ_PR_retain;
	}
	if (Attributes & ObjCDeclSpec::DQ_PR_strong) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "assign" << "strong";
	Attributes &= ~ObjCDeclSpec::DQ_PR_strong;
	}
	if (getLangOpts().ObjCAutoRefCount &&
	(Attributes & ObjCDeclSpec::DQ_PR_weak)) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "assign" << "weak";
	Attributes &= ~ObjCDeclSpec::DQ_PR_weak;
	}
	if (PropertyDecl->hasAttr<IBOutletCollectionAttr>())
	Diag(Loc, diag::warn_iboutletcollection_property_assign);
	} else if (Attributes & ObjCDeclSpec::DQ_PR_unsafe_unretained) {
	if (Attributes & ObjCDeclSpec::DQ_PR_copy) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "unsafe_unretained" << "copy";
	Attributes &= ~ObjCDeclSpec::DQ_PR_copy;
	}
	if (Attributes & ObjCDeclSpec::DQ_PR_retain) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "unsafe_unretained" << "retain";
	Attributes &= ~ObjCDeclSpec::DQ_PR_retain;
	}
	if (Attributes & ObjCDeclSpec::DQ_PR_strong) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "unsafe_unretained" << "strong";
	Attributes &= ~ObjCDeclSpec::DQ_PR_strong;
	}
	if (getLangOpts().ObjCAutoRefCount &&
	(Attributes & ObjCDeclSpec::DQ_PR_weak)) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "unsafe_unretained" << "weak";
	Attributes &= ~ObjCDeclSpec::DQ_PR_weak;
	}
	} else if (Attributes & ObjCDeclSpec::DQ_PR_copy) {
	if (Attributes & ObjCDeclSpec::DQ_PR_retain) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "copy" << "retain";
	Attributes &= ~ObjCDeclSpec::DQ_PR_retain;
	}
	if (Attributes & ObjCDeclSpec::DQ_PR_strong) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "copy" << "strong";
	Attributes &= ~ObjCDeclSpec::DQ_PR_strong;
	}
	if (Attributes & ObjCDeclSpec::DQ_PR_weak) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "copy" << "weak";
	Attributes &= ~ObjCDeclSpec::DQ_PR_weak;
	}
	}
	else if ((Attributes & ObjCDeclSpec::DQ_PR_retain) &&
	(Attributes & ObjCDeclSpec::DQ_PR_weak)) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "retain" << "weak";
	Attributes &= ~ObjCDeclSpec::DQ_PR_retain;
	}
	else if ((Attributes & ObjCDeclSpec::DQ_PR_strong) &&
	(Attributes & ObjCDeclSpec::DQ_PR_weak)) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "strong" << "weak";
	Attributes &= ~ObjCDeclSpec::DQ_PR_weak;
	}

	if (Attributes & ObjCDeclSpec::DQ_PR_weak) {
	// 'weak' and 'nonnull' are mutually exclusive.
	if (auto nullability = PropertyTy->getNullability(Context)) {
	if (*nullability == NullabilityKind::NonNull)
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "nonnull" << "weak";
	}
	}

	if ((Attributes & ObjCDeclSpec::DQ_PR_atomic) &&
	(Attributes & ObjCDeclSpec::DQ_PR_nonatomic)) {
	Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
	<< "atomic" << "nonatomic";
	Attributes &= ~ObjCDeclSpec::DQ_PR_atomic;
	}

	// Warn if user supplied no assignment attribute, property is
	// readwrite, and this is an object type.
	if (!getOwnershipRule(Attributes) && PropertyTy->isObjCRetainableType()) {
	if (Attributes & ObjCDeclSpec::DQ_PR_readonly) {
	// do nothing
	} else if (getLangOpts().ObjCAutoRefCount) {
	// With arc, @property definitions should default to strong when
	// not specified.
	PropertyDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_strong);
	} else if (PropertyTy->isObjCObjectPointerType()) {
	bool isAnyClassTy =
	(PropertyTy->isObjCClassType() \|\|
	PropertyTy->isObjCQualifiedClassType());
	// In non-gc, non-arc mode, 'Class' is treated as a 'void *' no need to
	// issue any warning.
	if (isAnyClassTy && getLangOpts().getGC() == LangOptions::NonGC)
	;
	else if (propertyInPrimaryClass) {
	// Don't issue warning on property with no life time in class
	// extension as it is inherited from property in primary class.
	// Skip this warning in gc-only mode.
	if (getLangOpts().getGC() != LangOptions::GCOnly)
	Diag(Loc, diag::warn_objc_property_no_assignment_attribute);

	// If non-gc code warn that this is likely inappropriate.
	if (getLangOpts().getGC() == LangOptions::NonGC)
	Diag(Loc, diag::warn_objc_property_default_assign_on_object);
	}
	}

	// FIXME: Implement warning dependent on NSCopying being
	// implemented. See also:
	// <rdar://5168496&4855821&5607453&5096644&4947311&5698469&4947014&5168496>
	// (please trim this list while you are at it).
	}

	if (!(Attributes & ObjCDeclSpec::DQ_PR_copy)
	&&!(Attributes & ObjCDeclSpec::DQ_PR_readonly)
	&& getLangOpts().getGC() == LangOptions::GCOnly
	&& PropertyTy->isBlockPointerType())
	Diag(Loc, diag::warn_objc_property_copy_missing_on_block);
	else if ((Attributes & ObjCDeclSpec::DQ_PR_retain) &&
	!(Attributes & ObjCDeclSpec::DQ_PR_readonly) &&
	!(Attributes & ObjCDeclSpec::DQ_PR_strong) &&
	PropertyTy->isBlockPointerType())
	Diag(Loc, diag::warn_objc_property_retain_of_block);

	if ((Attributes & ObjCDeclSpec::DQ_PR_readonly) &&
	(Attributes & ObjCDeclSpec::DQ_PR_setter))
	Diag(Loc, diag::warn_objc_readonly_property_has_setter);
	}
	Index: head/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp (revision 322855)
	@@ -1,4196 +1,4202 @@
	//===--- ASTReaderDecl.cpp - Decl Deserialization ---------------- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the ASTReader::ReadDeclRecord method, which is the
	// entrypoint for loading a decl.
	//
	//===----------------------------------------------------------------------===//

	#include "ASTCommon.h"
	#include "ASTReaderInternals.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclGroup.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/DeclVisitor.h"
	#include "clang/AST/Expr.h"
	#include "clang/Sema/IdentifierResolver.h"
	#include "clang/Sema/SemaDiagnostic.h"
	#include "clang/Serialization/ASTReader.h"
	#include "llvm/Support/SaveAndRestore.h"

	using namespace clang;
	using namespace clang::serialization;

	//===----------------------------------------------------------------------===//
	// Declaration deserialization
	//===----------------------------------------------------------------------===//

	namespace clang {
	class ASTDeclReader : public DeclVisitor<ASTDeclReader, void> {
	ASTReader &Reader;
	ASTRecordReader &Record;
	ASTReader::RecordLocation Loc;
	const DeclID ThisDeclID;
	const SourceLocation ThisDeclLoc;
	typedef ASTReader::RecordData RecordData;
	TypeID TypeIDForTypeDecl;
	unsigned AnonymousDeclNumber;
	GlobalDeclID NamedDeclForTagDecl;
	IdentifierInfo *TypedefNameForLinkage;

	bool HasPendingBody;

	///\brief A flag to carry the information for a decl from the entity is
	/// used. We use it to delay the marking of the canonical decl as used until
	/// the entire declaration is deserialized and merged.
	bool IsDeclMarkedUsed;

	uint64_t GetCurrentCursorOffset();

	uint64_t ReadLocalOffset() {
	uint64_t LocalOffset = Record.readInt();
	assert(LocalOffset < Loc.Offset && "offset point after current record");
	return LocalOffset ? Loc.Offset - LocalOffset : 0;
	}

	uint64_t ReadGlobalOffset() {
	uint64_t Local = ReadLocalOffset();
	return Local ? Record.getGlobalBitOffset(Local) : 0;
	}

	SourceLocation ReadSourceLocation() {
	return Record.readSourceLocation();
	}

	SourceRange ReadSourceRange() {
	return Record.readSourceRange();
	}

	TypeSourceInfo *GetTypeSourceInfo() {
	return Record.getTypeSourceInfo();
	}

	serialization::DeclID ReadDeclID() {
	return Record.readDeclID();
	}

	std::string ReadString() {
	return Record.readString();
	}

	void ReadDeclIDList(SmallVectorImpl<DeclID> &IDs) {
	for (unsigned I = 0, Size = Record.readInt(); I != Size; ++I)
	IDs.push_back(ReadDeclID());
	}

	Decl *ReadDecl() {
	return Record.readDecl();
	}

	template<typename T>
	T *ReadDeclAs() {
	return Record.readDeclAs<T>();
	}

	void ReadQualifierInfo(QualifierInfo &Info) {
	Record.readQualifierInfo(Info);
	}

	void ReadDeclarationNameLoc(DeclarationNameLoc &DNLoc, DeclarationName Name) {
	Record.readDeclarationNameLoc(DNLoc, Name);
	}

	serialization::SubmoduleID readSubmoduleID() {
	if (Record.getIdx() == Record.size())
	return 0;

	return Record.getGlobalSubmoduleID(Record.readInt());
	}

	Module *readModule() {
	return Record.getSubmodule(readSubmoduleID());
	}

	void ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update);
	void ReadCXXDefinitionData(struct CXXRecordDecl::DefinitionData &Data,
	const CXXRecordDecl *D);
	void MergeDefinitionData(CXXRecordDecl *D,
	struct CXXRecordDecl::DefinitionData &&NewDD);
	void ReadObjCDefinitionData(struct ObjCInterfaceDecl::DefinitionData &Data);
	void MergeDefinitionData(ObjCInterfaceDecl *D,
	struct ObjCInterfaceDecl::DefinitionData &&NewDD);
	void ReadObjCDefinitionData(struct ObjCProtocolDecl::DefinitionData &Data);
	void MergeDefinitionData(ObjCProtocolDecl *D,
	struct ObjCProtocolDecl::DefinitionData &&NewDD);

	static NamedDecl *getAnonymousDeclForMerging(ASTReader &Reader,
	DeclContext *DC,
	unsigned Index);
	static void setAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC,
	unsigned Index, NamedDecl *D);

	/// Results from loading a RedeclarableDecl.
	class RedeclarableResult {
	Decl *MergeWith;
	GlobalDeclID FirstID;
	bool IsKeyDecl;

	public:
	RedeclarableResult(Decl *MergeWith, GlobalDeclID FirstID, bool IsKeyDecl)
	: MergeWith(MergeWith), FirstID(FirstID), IsKeyDecl(IsKeyDecl) {}

	/// \brief Retrieve the first ID.
	GlobalDeclID getFirstID() const { return FirstID; }

	/// \brief Is this declaration a key declaration?
	bool isKeyDecl() const { return IsKeyDecl; }

	/// \brief Get a known declaration that this should be merged with, if
	/// any.
	Decl *getKnownMergeTarget() const { return MergeWith; }
	};

	/// \brief Class used to capture the result of searching for an existing
	/// declaration of a specific kind and name, along with the ability
	/// to update the place where this result was found (the declaration
	/// chain hanging off an identifier or the DeclContext we searched in)
	/// if requested.
	class FindExistingResult {
	ASTReader &Reader;
	NamedDecl *New;
	NamedDecl *Existing;
	bool AddResult;

	unsigned AnonymousDeclNumber;
	IdentifierInfo *TypedefNameForLinkage;

	void operator=(FindExistingResult &&) = delete;

	public:
	FindExistingResult(ASTReader &Reader)
	: Reader(Reader), New(nullptr), Existing(nullptr), AddResult(false),
	AnonymousDeclNumber(0), TypedefNameForLinkage(nullptr) {}

	FindExistingResult(ASTReader &Reader, NamedDecl New, NamedDecl Existing,
	unsigned AnonymousDeclNumber,
	IdentifierInfo *TypedefNameForLinkage)
	: Reader(Reader), New(New), Existing(Existing), AddResult(true),
	AnonymousDeclNumber(AnonymousDeclNumber),
	TypedefNameForLinkage(TypedefNameForLinkage) {}

	FindExistingResult(FindExistingResult &&Other)
	: Reader(Other.Reader), New(Other.New), Existing(Other.Existing),
	AddResult(Other.AddResult),
	AnonymousDeclNumber(Other.AnonymousDeclNumber),
	TypedefNameForLinkage(Other.TypedefNameForLinkage) {
	Other.AddResult = false;
	}

	~FindExistingResult();

	/// \brief Suppress the addition of this result into the known set of
	/// names.
	void suppress() { AddResult = false; }

	operator NamedDecl*() const { return Existing; }

	template<typename T>
	operator T*() const { return dyn_cast_or_null<T>(Existing); }
	};

	static DeclContext *getPrimaryContextForMerging(ASTReader &Reader,
	DeclContext *DC);
	FindExistingResult findExisting(NamedDecl *D);

	public:
	ASTDeclReader(ASTReader &Reader, ASTRecordReader &Record,
	ASTReader::RecordLocation Loc,
	DeclID thisDeclID, SourceLocation ThisDeclLoc)
	: Reader(Reader), Record(Record), Loc(Loc),
	ThisDeclID(thisDeclID), ThisDeclLoc(ThisDeclLoc),
	TypeIDForTypeDecl(0), NamedDeclForTagDecl(0),
	TypedefNameForLinkage(nullptr), HasPendingBody(false),
	IsDeclMarkedUsed(false) {}

	template <typename T> static
	void AddLazySpecializations(T *D,
	SmallVectorImpl<serialization::DeclID>& IDs) {
	if (IDs.empty())
	return;

	// FIXME: We should avoid this pattern of getting the ASTContext.
	ASTContext &C = D->getASTContext();

	auto *&LazySpecializations = D->getCommonPtr()->LazySpecializations;

	if (auto &Old = LazySpecializations) {
	IDs.insert(IDs.end(), Old + 1, Old + 1 + Old[0]);
	std::sort(IDs.begin(), IDs.end());
	IDs.erase(std::unique(IDs.begin(), IDs.end()), IDs.end());
	}

	auto *Result = new (C) serialization::DeclID[1 + IDs.size()];
	*Result = IDs.size();
	std::copy(IDs.begin(), IDs.end(), Result + 1);

	LazySpecializations = Result;
	}

	template <typename DeclT>
	static Decl getMostRecentDeclImpl(Redeclarable<DeclT> D);
	static Decl *getMostRecentDeclImpl(...);
	static Decl getMostRecentDecl(Decl D);

	template <typename DeclT>
	static void attachPreviousDeclImpl(ASTReader &Reader,
	Redeclarable<DeclT> D, Decl Previous,
	Decl *Canon);
	static void attachPreviousDeclImpl(ASTReader &Reader, ...);
	static void attachPreviousDecl(ASTReader &Reader, Decl D, Decl Previous,
	Decl *Canon);

	template <typename DeclT>
	static void attachLatestDeclImpl(Redeclarable<DeclT> D, Decl Latest);
	static void attachLatestDeclImpl(...);
	static void attachLatestDecl(Decl D, Decl latest);

	template <typename DeclT>
	static void markIncompleteDeclChainImpl(Redeclarable<DeclT> *D);
	static void markIncompleteDeclChainImpl(...);

	/// \brief Determine whether this declaration has a pending body.
	bool hasPendingBody() const { return HasPendingBody; }

	void ReadFunctionDefinition(FunctionDecl *FD);
	void Visit(Decl *D);

	void UpdateDecl(Decl *D, llvm::SmallVectorImpl<serialization::DeclID>&);

	static void setNextObjCCategory(ObjCCategoryDecl *Cat,
	ObjCCategoryDecl *Next) {
	Cat->NextClassCategory = Next;
	}

	void VisitDecl(Decl *D);
	void VisitPragmaCommentDecl(PragmaCommentDecl *D);
	void VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D);
	void VisitTranslationUnitDecl(TranslationUnitDecl *TU);
	void VisitNamedDecl(NamedDecl *ND);
	void VisitLabelDecl(LabelDecl *LD);
	void VisitNamespaceDecl(NamespaceDecl *D);
	void VisitUsingDirectiveDecl(UsingDirectiveDecl *D);
	void VisitNamespaceAliasDecl(NamespaceAliasDecl *D);
	void VisitTypeDecl(TypeDecl *TD);
	RedeclarableResult VisitTypedefNameDecl(TypedefNameDecl *TD);
	void VisitTypedefDecl(TypedefDecl *TD);
	void VisitTypeAliasDecl(TypeAliasDecl *TD);
	void VisitUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *D);
	RedeclarableResult VisitTagDecl(TagDecl *TD);
	void VisitEnumDecl(EnumDecl *ED);
	RedeclarableResult VisitRecordDeclImpl(RecordDecl *RD);
	void VisitRecordDecl(RecordDecl *RD) { VisitRecordDeclImpl(RD); }
	RedeclarableResult VisitCXXRecordDeclImpl(CXXRecordDecl *D);
	void VisitCXXRecordDecl(CXXRecordDecl *D) { VisitCXXRecordDeclImpl(D); }
	RedeclarableResult VisitClassTemplateSpecializationDeclImpl(
	ClassTemplateSpecializationDecl *D);
	void VisitClassTemplateSpecializationDecl(
	ClassTemplateSpecializationDecl *D) {
	VisitClassTemplateSpecializationDeclImpl(D);
	}
	void VisitClassTemplatePartialSpecializationDecl(
	ClassTemplatePartialSpecializationDecl *D);
	void VisitClassScopeFunctionSpecializationDecl(
	ClassScopeFunctionSpecializationDecl *D);
	RedeclarableResult
	VisitVarTemplateSpecializationDeclImpl(VarTemplateSpecializationDecl *D);
	void VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D) {
	VisitVarTemplateSpecializationDeclImpl(D);
	}
	void VisitVarTemplatePartialSpecializationDecl(
	VarTemplatePartialSpecializationDecl *D);
	void VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D);
	void VisitValueDecl(ValueDecl *VD);
	void VisitEnumConstantDecl(EnumConstantDecl *ECD);
	void VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D);
	void VisitDeclaratorDecl(DeclaratorDecl *DD);
	void VisitFunctionDecl(FunctionDecl *FD);
	void VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *GD);
	void VisitCXXMethodDecl(CXXMethodDecl *D);
	void VisitCXXConstructorDecl(CXXConstructorDecl *D);
	void VisitCXXDestructorDecl(CXXDestructorDecl *D);
	void VisitCXXConversionDecl(CXXConversionDecl *D);
	void VisitFieldDecl(FieldDecl *FD);
	void VisitMSPropertyDecl(MSPropertyDecl *FD);
	void VisitIndirectFieldDecl(IndirectFieldDecl *FD);
	RedeclarableResult VisitVarDeclImpl(VarDecl *D);
	void VisitVarDecl(VarDecl *VD) { VisitVarDeclImpl(VD); }
	void VisitImplicitParamDecl(ImplicitParamDecl *PD);
	void VisitParmVarDecl(ParmVarDecl *PD);
	void VisitDecompositionDecl(DecompositionDecl *DD);
	void VisitBindingDecl(BindingDecl *BD);
	void VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D);
	DeclID VisitTemplateDecl(TemplateDecl *D);
	RedeclarableResult VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D);
	void VisitClassTemplateDecl(ClassTemplateDecl *D);
	void VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D);
	void VisitVarTemplateDecl(VarTemplateDecl *D);
	void VisitFunctionTemplateDecl(FunctionTemplateDecl *D);
	void VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D);
	void VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D);
	void VisitUsingDecl(UsingDecl *D);
	void VisitUsingPackDecl(UsingPackDecl *D);
	void VisitUsingShadowDecl(UsingShadowDecl *D);
	void VisitConstructorUsingShadowDecl(ConstructorUsingShadowDecl *D);
	void VisitLinkageSpecDecl(LinkageSpecDecl *D);
	void VisitExportDecl(ExportDecl *D);
	void VisitFileScopeAsmDecl(FileScopeAsmDecl *AD);
	void VisitImportDecl(ImportDecl *D);
	void VisitAccessSpecDecl(AccessSpecDecl *D);
	void VisitFriendDecl(FriendDecl *D);
	void VisitFriendTemplateDecl(FriendTemplateDecl *D);
	void VisitStaticAssertDecl(StaticAssertDecl *D);
	void VisitBlockDecl(BlockDecl *BD);
	void VisitCapturedDecl(CapturedDecl *CD);
	void VisitEmptyDecl(EmptyDecl *D);

	std::pair<uint64_t, uint64_t> VisitDeclContext(DeclContext *DC);

	template<typename T>
	RedeclarableResult VisitRedeclarable(Redeclarable<T> *D);

	template<typename T>
	void mergeRedeclarable(Redeclarable<T> *D, RedeclarableResult &Redecl,
	DeclID TemplatePatternID = 0);

	template<typename T>
	void mergeRedeclarable(Redeclarable<T> D, T Existing,
	RedeclarableResult &Redecl,
	DeclID TemplatePatternID = 0);

	template<typename T>
	void mergeMergeable(Mergeable<T> *D);

	void mergeTemplatePattern(RedeclarableTemplateDecl *D,
	RedeclarableTemplateDecl *Existing,
	DeclID DsID, bool IsKeyDecl);

	ObjCTypeParamList *ReadObjCTypeParamList();

	// FIXME: Reorder according to DeclNodes.td?
	void VisitObjCMethodDecl(ObjCMethodDecl *D);
	void VisitObjCTypeParamDecl(ObjCTypeParamDecl *D);
	void VisitObjCContainerDecl(ObjCContainerDecl *D);
	void VisitObjCInterfaceDecl(ObjCInterfaceDecl *D);
	void VisitObjCIvarDecl(ObjCIvarDecl *D);
	void VisitObjCProtocolDecl(ObjCProtocolDecl *D);
	void VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D);
	void VisitObjCCategoryDecl(ObjCCategoryDecl *D);
	void VisitObjCImplDecl(ObjCImplDecl *D);
	void VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D);
	void VisitObjCImplementationDecl(ObjCImplementationDecl *D);
	void VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *D);
	void VisitObjCPropertyDecl(ObjCPropertyDecl *D);
	void VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D);
	void VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D);
	void VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D);
	void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D);
	};
	} // end namespace clang

	namespace {
	/// Iterator over the redeclarations of a declaration that have already
	/// been merged into the same redeclaration chain.
	template<typename DeclT>
	class MergedRedeclIterator {
	DeclT Start, Canonical, *Current;
	public:
	MergedRedeclIterator() : Current(nullptr) {}
	MergedRedeclIterator(DeclT *Start)
	: Start(Start), Canonical(nullptr), Current(Start) {}

	DeclT operator() { return Current; }

	MergedRedeclIterator &operator++() {
	if (Current->isFirstDecl()) {
	Canonical = Current;
	Current = Current->getMostRecentDecl();
	} else
	Current = Current->getPreviousDecl();

	// If we started in the merged portion, we'll reach our start position
	// eventually. Otherwise, we'll never reach it, but the second declaration
	// we reached was the canonical declaration, so stop when we see that one
	// again.
	if (Current == Start \|\| Current == Canonical)
	Current = nullptr;
	return *this;
	}

	friend bool operator!=(const MergedRedeclIterator &A,
	const MergedRedeclIterator &B) {
	return A.Current != B.Current;
	}
	};
	} // end anonymous namespace

	template <typename DeclT>
	static llvm::iterator_range<MergedRedeclIterator<DeclT>>
	merged_redecls(DeclT *D) {
	return llvm::make_range(MergedRedeclIterator<DeclT>(D),
	MergedRedeclIterator<DeclT>());
	}

	uint64_t ASTDeclReader::GetCurrentCursorOffset() {
	return Loc.F->DeclsCursor.GetCurrentBitNo() + Loc.F->GlobalBitOffset;
	}

	void ASTDeclReader::ReadFunctionDefinition(FunctionDecl *FD) {
	if (Record.readInt())
	Reader.BodySource[FD] = Loc.F->Kind == ModuleKind::MK_MainFile;
	if (auto *CD = dyn_cast<CXXConstructorDecl>(FD)) {
	CD->NumCtorInitializers = Record.readInt();
	if (CD->NumCtorInitializers)
	CD->CtorInitializers = ReadGlobalOffset();
	}
	// Store the offset of the body so we can lazily load it later.
	Reader.PendingBodies[FD] = GetCurrentCursorOffset();
	HasPendingBody = true;
	}

	void ASTDeclReader::Visit(Decl *D) {
	DeclVisitor<ASTDeclReader, void>::Visit(D);

	// At this point we have deserialized and merged the decl and it is safe to
	// update its canonical decl to signal that the entire entity is used.
	D->getCanonicalDecl()->Used \|= IsDeclMarkedUsed;
	IsDeclMarkedUsed = false;

	if (DeclaratorDecl *DD = dyn_cast<DeclaratorDecl>(D)) {
	if (DD->DeclInfo) {
	DeclaratorDecl::ExtInfo *Info =
	DD->DeclInfo.get<DeclaratorDecl::ExtInfo *>();
	Info->TInfo = GetTypeSourceInfo();
	}
	else {
	DD->DeclInfo = GetTypeSourceInfo();
	}
	}

	if (TypeDecl *TD = dyn_cast<TypeDecl>(D)) {
	// We have a fully initialized TypeDecl. Read its type now.
	TD->setTypeForDecl(Reader.GetType(TypeIDForTypeDecl).getTypePtrOrNull());

	// If this is a tag declaration with a typedef name for linkage, it's safe
	// to load that typedef now.
	if (NamedDeclForTagDecl)
	cast<TagDecl>(D)->TypedefNameDeclOrQualifier =
	cast<TypedefNameDecl>(Reader.GetDecl(NamedDeclForTagDecl));
	} else if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(D)) {
	// if we have a fully initialized TypeDecl, we can safely read its type now.
	ID->TypeForDecl = Reader.GetType(TypeIDForTypeDecl).getTypePtrOrNull();
	} else if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
	// FunctionDecl's body was written last after all other Stmts/Exprs.
	// We only read it if FD doesn't already have a body (e.g., from another
	// module).
	// FIXME: Can we diagnose ODR violations somehow?
	if (Record.readInt())
	ReadFunctionDefinition(FD);
	}
	}

	void ASTDeclReader::VisitDecl(Decl *D) {
	if (D->isTemplateParameter() \|\| D->isTemplateParameterPack() \|\|
	isa<ParmVarDecl>(D)) {
	// We don't want to deserialize the DeclContext of a template
	// parameter or of a parameter of a function template immediately. These
	// entities might be used in the formulation of its DeclContext (for
	// example, a function parameter can be used in decltype() in trailing
	// return type of the function). Use the translation unit DeclContext as a
	// placeholder.
	GlobalDeclID SemaDCIDForTemplateParmDecl = ReadDeclID();
	GlobalDeclID LexicalDCIDForTemplateParmDecl = ReadDeclID();
	if (!LexicalDCIDForTemplateParmDecl)
	LexicalDCIDForTemplateParmDecl = SemaDCIDForTemplateParmDecl;
	Reader.addPendingDeclContextInfo(D,
	SemaDCIDForTemplateParmDecl,
	LexicalDCIDForTemplateParmDecl);
	D->setDeclContext(Reader.getContext().getTranslationUnitDecl());
	} else {
	DeclContext *SemaDC = ReadDeclAs<DeclContext>();
	DeclContext *LexicalDC = ReadDeclAs<DeclContext>();
	if (!LexicalDC)
	LexicalDC = SemaDC;
	DeclContext *MergedSemaDC = Reader.MergedDeclContexts.lookup(SemaDC);
	// Avoid calling setLexicalDeclContext() directly because it uses
	// Decl::getASTContext() internally which is unsafe during derialization.
	D->setDeclContextsImpl(MergedSemaDC ? MergedSemaDC : SemaDC, LexicalDC,
	Reader.getContext());
	}
	D->setLocation(ThisDeclLoc);
	D->setInvalidDecl(Record.readInt());
	if (Record.readInt()) { // hasAttrs
	AttrVec Attrs;
	Record.readAttributes(Attrs);
	// Avoid calling setAttrs() directly because it uses Decl::getASTContext()
	// internally which is unsafe during derialization.
	D->setAttrsImpl(Attrs, Reader.getContext());
	}
	D->setImplicit(Record.readInt());
	D->Used = Record.readInt();
	IsDeclMarkedUsed \|= D->Used;
	D->setReferenced(Record.readInt());
	D->setTopLevelDeclInObjCContainer(Record.readInt());
	D->setAccess((AccessSpecifier)Record.readInt());
	D->FromASTFile = true;
	bool ModulePrivate = Record.readInt();

	// Determine whether this declaration is part of a (sub)module. If so, it
	// may not yet be visible.
	if (unsigned SubmoduleID = readSubmoduleID()) {
	// Store the owning submodule ID in the declaration.
	D->setModuleOwnershipKind(
	ModulePrivate ? Decl::ModuleOwnershipKind::ModulePrivate
	: Decl::ModuleOwnershipKind::VisibleWhenImported);
	D->setOwningModuleID(SubmoduleID);

	if (ModulePrivate) {
	// Module-private declarations are never visible, so there is no work to
	// do.
	} else if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
	// If local visibility is being tracked, this declaration will become
	// hidden and visible as the owning module does.
	} else if (Module *Owner = Reader.getSubmodule(SubmoduleID)) {
	// Mark the declaration as visible when its owning module becomes visible.
	if (Owner->NameVisibility == Module::AllVisible)
	D->setVisibleDespiteOwningModule();
	else
	Reader.HiddenNamesMap[Owner].push_back(D);
	}
	} else if (ModulePrivate) {
	D->setModuleOwnershipKind(Decl::ModuleOwnershipKind::ModulePrivate);
	}
	}

	void ASTDeclReader::VisitPragmaCommentDecl(PragmaCommentDecl *D) {
	VisitDecl(D);
	D->setLocation(ReadSourceLocation());
	D->CommentKind = (PragmaMSCommentKind)Record.readInt();
	std::string Arg = ReadString();
	memcpy(D->getTrailingObjects<char>(), Arg.data(), Arg.size());
	D->getTrailingObjects<char>()[Arg.size()] = '\0';
	}

	void ASTDeclReader::VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D) {
	VisitDecl(D);
	D->setLocation(ReadSourceLocation());
	std::string Name = ReadString();
	memcpy(D->getTrailingObjects<char>(), Name.data(), Name.size());
	D->getTrailingObjects<char>()[Name.size()] = '\0';

	D->ValueStart = Name.size() + 1;
	std::string Value = ReadString();
	memcpy(D->getTrailingObjects<char>() + D->ValueStart, Value.data(),
	Value.size());
	D->getTrailingObjects<char>()[D->ValueStart + Value.size()] = '\0';
	}

	void ASTDeclReader::VisitTranslationUnitDecl(TranslationUnitDecl *TU) {
	llvm_unreachable("Translation units are not serialized");
	}

	void ASTDeclReader::VisitNamedDecl(NamedDecl *ND) {
	VisitDecl(ND);
	ND->setDeclName(Record.readDeclarationName());
	AnonymousDeclNumber = Record.readInt();
	}

	void ASTDeclReader::VisitTypeDecl(TypeDecl *TD) {
	VisitNamedDecl(TD);
	TD->setLocStart(ReadSourceLocation());
	// Delay type reading until after we have fully initialized the decl.
	TypeIDForTypeDecl = Record.getGlobalTypeID(Record.readInt());
	}

	ASTDeclReader::RedeclarableResult
	ASTDeclReader::VisitTypedefNameDecl(TypedefNameDecl *TD) {
	RedeclarableResult Redecl = VisitRedeclarable(TD);
	VisitTypeDecl(TD);
	TypeSourceInfo *TInfo = GetTypeSourceInfo();
	if (Record.readInt()) { // isModed
	QualType modedT = Record.readType();
	TD->setModedTypeSourceInfo(TInfo, modedT);
	} else
	TD->setTypeSourceInfo(TInfo);
	// Read and discard the declaration for which this is a typedef name for
	// linkage, if it exists. We cannot rely on our type to pull in this decl,
	// because it might have been merged with a type from another module and
	// thus might not refer to our version of the declaration.
	ReadDecl();
	return Redecl;
	}

	void ASTDeclReader::VisitTypedefDecl(TypedefDecl *TD) {
	RedeclarableResult Redecl = VisitTypedefNameDecl(TD);
	mergeRedeclarable(TD, Redecl);
	}

	void ASTDeclReader::VisitTypeAliasDecl(TypeAliasDecl *TD) {
	RedeclarableResult Redecl = VisitTypedefNameDecl(TD);
	if (auto *Template = ReadDeclAs<TypeAliasTemplateDecl>())
	// Merged when we merge the template.
	TD->setDescribedAliasTemplate(Template);
	else
	mergeRedeclarable(TD, Redecl);
	}

	ASTDeclReader::RedeclarableResult ASTDeclReader::VisitTagDecl(TagDecl *TD) {
	RedeclarableResult Redecl = VisitRedeclarable(TD);
	VisitTypeDecl(TD);

	TD->IdentifierNamespace = Record.readInt();
	TD->setTagKind((TagDecl::TagKind)Record.readInt());
	if (!isa<CXXRecordDecl>(TD))
	TD->setCompleteDefinition(Record.readInt());
	TD->setEmbeddedInDeclarator(Record.readInt());
	TD->setFreeStanding(Record.readInt());
	TD->setCompleteDefinitionRequired(Record.readInt());
	TD->setBraceRange(ReadSourceRange());

	switch (Record.readInt()) {
	case 0:
	break;
	case 1: { // ExtInfo
	TagDecl::ExtInfo *Info = new (Reader.getContext()) TagDecl::ExtInfo();
	ReadQualifierInfo(*Info);
	TD->TypedefNameDeclOrQualifier = Info;
	break;
	}
	case 2: // TypedefNameForAnonDecl
	NamedDeclForTagDecl = ReadDeclID();
	TypedefNameForLinkage = Record.getIdentifierInfo();
	break;
	default:
	llvm_unreachable("unexpected tag info kind");
	}

	if (!isa<CXXRecordDecl>(TD))
	mergeRedeclarable(TD, Redecl);
	return Redecl;
	}

	void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) {
	VisitTagDecl(ED);
	if (TypeSourceInfo *TI = GetTypeSourceInfo())
	ED->setIntegerTypeSourceInfo(TI);
	else
	ED->setIntegerType(Record.readType());
	ED->setPromotionType(Record.readType());
	ED->setNumPositiveBits(Record.readInt());
	ED->setNumNegativeBits(Record.readInt());
	ED->IsScoped = Record.readInt();
	ED->IsScopedUsingClassTag = Record.readInt();
	ED->IsFixed = Record.readInt();

	// If this is a definition subject to the ODR, and we already have a
	// definition, merge this one into it.
	if (ED->IsCompleteDefinition &&
	Reader.getContext().getLangOpts().Modules &&
	Reader.getContext().getLangOpts().CPlusPlus) {
	EnumDecl *&OldDef = Reader.EnumDefinitions[ED->getCanonicalDecl()];
	if (!OldDef) {
	// This is the first time we've seen an imported definition. Look for a
	// local definition before deciding that we are the first definition.
	for (auto *D : merged_redecls(ED->getCanonicalDecl())) {
	if (!D->isFromASTFile() && D->isCompleteDefinition()) {
	OldDef = D;
	break;
	}
	}
	}
	if (OldDef) {
	Reader.MergedDeclContexts.insert(std::make_pair(ED, OldDef));
	ED->IsCompleteDefinition = false;
	Reader.mergeDefinitionVisibility(OldDef, ED);
	} else {
	OldDef = ED;
	}
	}

	if (EnumDecl *InstED = ReadDeclAs<EnumDecl>()) {
	TemplateSpecializationKind TSK =
	(TemplateSpecializationKind)Record.readInt();
	SourceLocation POI = ReadSourceLocation();
	ED->setInstantiationOfMemberEnum(Reader.getContext(), InstED, TSK);
	ED->getMemberSpecializationInfo()->setPointOfInstantiation(POI);
	}
	}

	ASTDeclReader::RedeclarableResult
	ASTDeclReader::VisitRecordDeclImpl(RecordDecl *RD) {
	RedeclarableResult Redecl = VisitTagDecl(RD);
	RD->setHasFlexibleArrayMember(Record.readInt());
	RD->setAnonymousStructOrUnion(Record.readInt());
	RD->setHasObjectMember(Record.readInt());
	RD->setHasVolatileMember(Record.readInt());
	return Redecl;
	}

	void ASTDeclReader::VisitValueDecl(ValueDecl *VD) {
	VisitNamedDecl(VD);
	VD->setType(Record.readType());
	}

	void ASTDeclReader::VisitEnumConstantDecl(EnumConstantDecl *ECD) {
	VisitValueDecl(ECD);
	if (Record.readInt())
	ECD->setInitExpr(Record.readExpr());
	ECD->setInitVal(Record.readAPSInt());
	mergeMergeable(ECD);
	}

	void ASTDeclReader::VisitDeclaratorDecl(DeclaratorDecl *DD) {
	VisitValueDecl(DD);
	DD->setInnerLocStart(ReadSourceLocation());
	if (Record.readInt()) { // hasExtInfo
	DeclaratorDecl::ExtInfo *Info
	= new (Reader.getContext()) DeclaratorDecl::ExtInfo();
	ReadQualifierInfo(*Info);
	DD->DeclInfo = Info;
	}
	}

	void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
	RedeclarableResult Redecl = VisitRedeclarable(FD);
	VisitDeclaratorDecl(FD);

	ReadDeclarationNameLoc(FD->DNLoc, FD->getDeclName());
	FD->IdentifierNamespace = Record.readInt();

	// FunctionDecl's body is handled last at ASTDeclReader::Visit,
	// after everything else is read.

	FD->SClass = (StorageClass)Record.readInt();
	FD->IsInline = Record.readInt();
	FD->IsInlineSpecified = Record.readInt();
	FD->IsExplicitSpecified = Record.readInt();
	FD->IsVirtualAsWritten = Record.readInt();
	FD->IsPure = Record.readInt();
	FD->HasInheritedPrototype = Record.readInt();
	FD->HasWrittenPrototype = Record.readInt();
	FD->IsDeleted = Record.readInt();
	FD->IsTrivial = Record.readInt();
	FD->IsDefaulted = Record.readInt();
	FD->IsExplicitlyDefaulted = Record.readInt();
	FD->HasImplicitReturnZero = Record.readInt();
	FD->IsConstexpr = Record.readInt();
	FD->UsesSEHTry = Record.readInt();
	FD->HasSkippedBody = Record.readInt();
	FD->IsLateTemplateParsed = Record.readInt();
	FD->setCachedLinkage(Linkage(Record.readInt()));
	FD->EndRangeLoc = ReadSourceLocation();

	switch ((FunctionDecl::TemplatedKind)Record.readInt()) {
	case FunctionDecl::TK_NonTemplate:
	mergeRedeclarable(FD, Redecl);
	break;
	case FunctionDecl::TK_FunctionTemplate:
	// Merged when we merge the template.
	FD->setDescribedFunctionTemplate(ReadDeclAs<FunctionTemplateDecl>());
	break;
	case FunctionDecl::TK_MemberSpecialization: {
	FunctionDecl *InstFD = ReadDeclAs<FunctionDecl>();
	TemplateSpecializationKind TSK =
	(TemplateSpecializationKind)Record.readInt();
	SourceLocation POI = ReadSourceLocation();
	FD->setInstantiationOfMemberFunction(Reader.getContext(), InstFD, TSK);
	FD->getMemberSpecializationInfo()->setPointOfInstantiation(POI);
	mergeRedeclarable(FD, Redecl);
	break;
	}
	case FunctionDecl::TK_FunctionTemplateSpecialization: {
	FunctionTemplateDecl *Template = ReadDeclAs<FunctionTemplateDecl>();
	TemplateSpecializationKind TSK =
	(TemplateSpecializationKind)Record.readInt();

	// Template arguments.
	SmallVector<TemplateArgument, 8> TemplArgs;
	Record.readTemplateArgumentList(TemplArgs, /Canonicalize/ true);

	// Template args as written.
	SmallVector<TemplateArgumentLoc, 8> TemplArgLocs;
	SourceLocation LAngleLoc, RAngleLoc;
	bool HasTemplateArgumentsAsWritten = Record.readInt();
	if (HasTemplateArgumentsAsWritten) {
	unsigned NumTemplateArgLocs = Record.readInt();
	TemplArgLocs.reserve(NumTemplateArgLocs);
	for (unsigned i=0; i != NumTemplateArgLocs; ++i)
	TemplArgLocs.push_back(Record.readTemplateArgumentLoc());

	LAngleLoc = ReadSourceLocation();
	RAngleLoc = ReadSourceLocation();
	}

	SourceLocation POI = ReadSourceLocation();

	ASTContext &C = Reader.getContext();
	TemplateArgumentList *TemplArgList
	= TemplateArgumentList::CreateCopy(C, TemplArgs);
	TemplateArgumentListInfo TemplArgsInfo(LAngleLoc, RAngleLoc);
	for (unsigned i=0, e = TemplArgLocs.size(); i != e; ++i)
	TemplArgsInfo.addArgument(TemplArgLocs[i]);
	FunctionTemplateSpecializationInfo *FTInfo
	= FunctionTemplateSpecializationInfo::Create(C, FD, Template, TSK,
	TemplArgList,
	HasTemplateArgumentsAsWritten ? &TemplArgsInfo
	: nullptr,
	POI);
	FD->TemplateOrSpecialization = FTInfo;

	if (FD->isCanonicalDecl()) { // if canonical add to template's set.
	// The template that contains the specializations set. It's not safe to
	// use getCanonicalDecl on Template since it may still be initializing.
	FunctionTemplateDecl *CanonTemplate = ReadDeclAs<FunctionTemplateDecl>();
	// Get the InsertPos by FindNodeOrInsertPos() instead of calling
	// InsertNode(FTInfo) directly to avoid the getASTContext() call in
	// FunctionTemplateSpecializationInfo's Profile().
	// We avoid getASTContext because a decl in the parent hierarchy may
	// be initializing.
	llvm::FoldingSetNodeID ID;
	FunctionTemplateSpecializationInfo::Profile(ID, TemplArgs, C);
	void *InsertPos = nullptr;
	FunctionTemplateDecl::Common *CommonPtr = CanonTemplate->getCommonPtr();
	FunctionTemplateSpecializationInfo *ExistingInfo =
	CommonPtr->Specializations.FindNodeOrInsertPos(ID, InsertPos);
	if (InsertPos)
	CommonPtr->Specializations.InsertNode(FTInfo, InsertPos);
	else {
	assert(Reader.getContext().getLangOpts().Modules &&
	"already deserialized this template specialization");
	mergeRedeclarable(FD, ExistingInfo->Function, Redecl);
	}
	}
	break;
	}
	case FunctionDecl::TK_DependentFunctionTemplateSpecialization: {
	// Templates.
	UnresolvedSet<8> TemplDecls;
	unsigned NumTemplates = Record.readInt();
	while (NumTemplates--)
	TemplDecls.addDecl(ReadDeclAs<NamedDecl>());

	// Templates args.
	TemplateArgumentListInfo TemplArgs;
	unsigned NumArgs = Record.readInt();
	while (NumArgs--)
	TemplArgs.addArgument(Record.readTemplateArgumentLoc());
	TemplArgs.setLAngleLoc(ReadSourceLocation());
	TemplArgs.setRAngleLoc(ReadSourceLocation());

	FD->setDependentTemplateSpecialization(Reader.getContext(),
	TemplDecls, TemplArgs);
	// These are not merged; we don't need to merge redeclarations of dependent
	// template friends.
	break;
	}
	}

	// Read in the parameters.
	unsigned NumParams = Record.readInt();
	SmallVector<ParmVarDecl *, 16> Params;
	Params.reserve(NumParams);
	for (unsigned I = 0; I != NumParams; ++I)
	Params.push_back(ReadDeclAs<ParmVarDecl>());
	FD->setParams(Reader.getContext(), Params);
	}

	void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) {
	VisitNamedDecl(MD);
	if (Record.readInt()) {
	// Load the body on-demand. Most clients won't care, because method
	// definitions rarely show up in headers.
	Reader.PendingBodies[MD] = GetCurrentCursorOffset();
	HasPendingBody = true;
	MD->setSelfDecl(ReadDeclAs<ImplicitParamDecl>());
	MD->setCmdDecl(ReadDeclAs<ImplicitParamDecl>());
	}
	MD->setInstanceMethod(Record.readInt());
	MD->setVariadic(Record.readInt());
	MD->setPropertyAccessor(Record.readInt());
	MD->setDefined(Record.readInt());
	MD->IsOverriding = Record.readInt();
	MD->HasSkippedBody = Record.readInt();

	MD->IsRedeclaration = Record.readInt();
	MD->HasRedeclaration = Record.readInt();
	if (MD->HasRedeclaration)
	Reader.getContext().setObjCMethodRedeclaration(MD,
	ReadDeclAs<ObjCMethodDecl>());

	MD->setDeclImplementation((ObjCMethodDecl::ImplementationControl)Record.readInt());
	MD->setObjCDeclQualifier((Decl::ObjCDeclQualifier)Record.readInt());
	MD->SetRelatedResultType(Record.readInt());
	MD->setReturnType(Record.readType());
	MD->setReturnTypeSourceInfo(GetTypeSourceInfo());
	MD->DeclEndLoc = ReadSourceLocation();
	unsigned NumParams = Record.readInt();
	SmallVector<ParmVarDecl *, 16> Params;
	Params.reserve(NumParams);
	for (unsigned I = 0; I != NumParams; ++I)
	Params.push_back(ReadDeclAs<ParmVarDecl>());

	MD->SelLocsKind = Record.readInt();
	unsigned NumStoredSelLocs = Record.readInt();
	SmallVector<SourceLocation, 16> SelLocs;
	SelLocs.reserve(NumStoredSelLocs);
	for (unsigned i = 0; i != NumStoredSelLocs; ++i)
	SelLocs.push_back(ReadSourceLocation());

	MD->setParamsAndSelLocs(Reader.getContext(), Params, SelLocs);
	}

	void ASTDeclReader::VisitObjCTypeParamDecl(ObjCTypeParamDecl *D) {
	VisitTypedefNameDecl(D);

	D->Variance = Record.readInt();
	D->Index = Record.readInt();
	D->VarianceLoc = ReadSourceLocation();
	D->ColonLoc = ReadSourceLocation();
	}

	void ASTDeclReader::VisitObjCContainerDecl(ObjCContainerDecl *CD) {
	VisitNamedDecl(CD);
	CD->setAtStartLoc(ReadSourceLocation());
	CD->setAtEndRange(ReadSourceRange());
	}

	ObjCTypeParamList *ASTDeclReader::ReadObjCTypeParamList() {
	unsigned numParams = Record.readInt();
	if (numParams == 0)
	return nullptr;

	SmallVector<ObjCTypeParamDecl *, 4> typeParams;
	typeParams.reserve(numParams);
	for (unsigned i = 0; i != numParams; ++i) {
	auto typeParam = ReadDeclAs<ObjCTypeParamDecl>();
	if (!typeParam)
	return nullptr;

	typeParams.push_back(typeParam);
	}

	SourceLocation lAngleLoc = ReadSourceLocation();
	SourceLocation rAngleLoc = ReadSourceLocation();

	return ObjCTypeParamList::create(Reader.getContext(), lAngleLoc,
	typeParams, rAngleLoc);
	}

	void ASTDeclReader::ReadObjCDefinitionData(
	struct ObjCInterfaceDecl::DefinitionData &Data) {
	// Read the superclass.
	Data.SuperClassTInfo = GetTypeSourceInfo();

	Data.EndLoc = ReadSourceLocation();
	Data.HasDesignatedInitializers = Record.readInt();

	// Read the directly referenced protocols and their SourceLocations.
	unsigned NumProtocols = Record.readInt();
	SmallVector<ObjCProtocolDecl *, 16> Protocols;
	Protocols.reserve(NumProtocols);
	for (unsigned I = 0; I != NumProtocols; ++I)
	Protocols.push_back(ReadDeclAs<ObjCProtocolDecl>());
	SmallVector<SourceLocation, 16> ProtoLocs;
	ProtoLocs.reserve(NumProtocols);
	for (unsigned I = 0; I != NumProtocols; ++I)
	ProtoLocs.push_back(ReadSourceLocation());
	Data.ReferencedProtocols.set(Protocols.data(), NumProtocols, ProtoLocs.data(),
	Reader.getContext());

	// Read the transitive closure of protocols referenced by this class.
	NumProtocols = Record.readInt();
	Protocols.clear();
	Protocols.reserve(NumProtocols);
	for (unsigned I = 0; I != NumProtocols; ++I)
	Protocols.push_back(ReadDeclAs<ObjCProtocolDecl>());
	Data.AllReferencedProtocols.set(Protocols.data(), NumProtocols,
	Reader.getContext());
	}

	void ASTDeclReader::MergeDefinitionData(ObjCInterfaceDecl *D,
	struct ObjCInterfaceDecl::DefinitionData &&NewDD) {
	// FIXME: odr checking?
	}

	void ASTDeclReader::VisitObjCInterfaceDecl(ObjCInterfaceDecl *ID) {
	RedeclarableResult Redecl = VisitRedeclarable(ID);
	VisitObjCContainerDecl(ID);
	TypeIDForTypeDecl = Record.getGlobalTypeID(Record.readInt());
	mergeRedeclarable(ID, Redecl);

	ID->TypeParamList = ReadObjCTypeParamList();
	if (Record.readInt()) {
	// Read the definition.
	ID->allocateDefinitionData();

	ReadObjCDefinitionData(ID->data());
	ObjCInterfaceDecl *Canon = ID->getCanonicalDecl();
	if (Canon->Data.getPointer()) {
	// If we already have a definition, keep the definition invariant and
	// merge the data.
	MergeDefinitionData(Canon, std::move(ID->data()));
	ID->Data = Canon->Data;
	} else {
	// Set the definition data of the canonical declaration, so other
	// redeclarations will see it.
	ID->getCanonicalDecl()->Data = ID->Data;

	// We will rebuild this list lazily.
	ID->setIvarList(nullptr);
	}

	// Note that we have deserialized a definition.
	Reader.PendingDefinitions.insert(ID);

	// Note that we've loaded this Objective-C class.
	Reader.ObjCClassesLoaded.push_back(ID);
	} else {
	ID->Data = ID->getCanonicalDecl()->Data;
	}
	}

	void ASTDeclReader::VisitObjCIvarDecl(ObjCIvarDecl *IVD) {
	VisitFieldDecl(IVD);
	IVD->setAccessControl((ObjCIvarDecl::AccessControl)Record.readInt());
	// This field will be built lazily.
	IVD->setNextIvar(nullptr);
	bool synth = Record.readInt();
	IVD->setSynthesize(synth);
	}

	void ASTDeclReader::ReadObjCDefinitionData(
	struct ObjCProtocolDecl::DefinitionData &Data) {

	unsigned NumProtoRefs = Record.readInt();
	SmallVector<ObjCProtocolDecl *, 16> ProtoRefs;
	ProtoRefs.reserve(NumProtoRefs);
	for (unsigned I = 0; I != NumProtoRefs; ++I)
	ProtoRefs.push_back(ReadDeclAs<ObjCProtocolDecl>());
	SmallVector<SourceLocation, 16> ProtoLocs;
	ProtoLocs.reserve(NumProtoRefs);
	for (unsigned I = 0; I != NumProtoRefs; ++I)
	ProtoLocs.push_back(ReadSourceLocation());
	Data.ReferencedProtocols.set(ProtoRefs.data(), NumProtoRefs,
	ProtoLocs.data(), Reader.getContext());
	}

	void ASTDeclReader::MergeDefinitionData(ObjCProtocolDecl *D,
	struct ObjCProtocolDecl::DefinitionData &&NewDD) {
	// FIXME: odr checking?
	}

	void ASTDeclReader::VisitObjCProtocolDecl(ObjCProtocolDecl *PD) {
	RedeclarableResult Redecl = VisitRedeclarable(PD);
	VisitObjCContainerDecl(PD);
	mergeRedeclarable(PD, Redecl);

	if (Record.readInt()) {
	// Read the definition.
	PD->allocateDefinitionData();

	ReadObjCDefinitionData(PD->data());

	ObjCProtocolDecl *Canon = PD->getCanonicalDecl();
	if (Canon->Data.getPointer()) {
	// If we already have a definition, keep the definition invariant and
	// merge the data.
	MergeDefinitionData(Canon, std::move(PD->data()));
	PD->Data = Canon->Data;
	} else {
	// Set the definition data of the canonical declaration, so other
	// redeclarations will see it.
	PD->getCanonicalDecl()->Data = PD->Data;
	}
	// Note that we have deserialized a definition.
	Reader.PendingDefinitions.insert(PD);
	} else {
	PD->Data = PD->getCanonicalDecl()->Data;
	}
	}

	void ASTDeclReader::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *FD) {
	VisitFieldDecl(FD);
	}

	void ASTDeclReader::VisitObjCCategoryDecl(ObjCCategoryDecl *CD) {
	VisitObjCContainerDecl(CD);
	CD->setCategoryNameLoc(ReadSourceLocation());
	CD->setIvarLBraceLoc(ReadSourceLocation());
	CD->setIvarRBraceLoc(ReadSourceLocation());

	// Note that this category has been deserialized. We do this before
	// deserializing the interface declaration, so that it will consider this
	/// category.
	Reader.CategoriesDeserialized.insert(CD);

	CD->ClassInterface = ReadDeclAs<ObjCInterfaceDecl>();
	CD->TypeParamList = ReadObjCTypeParamList();
	unsigned NumProtoRefs = Record.readInt();
	SmallVector<ObjCProtocolDecl *, 16> ProtoRefs;
	ProtoRefs.reserve(NumProtoRefs);
	for (unsigned I = 0; I != NumProtoRefs; ++I)
	ProtoRefs.push_back(ReadDeclAs<ObjCProtocolDecl>());
	SmallVector<SourceLocation, 16> ProtoLocs;
	ProtoLocs.reserve(NumProtoRefs);
	for (unsigned I = 0; I != NumProtoRefs; ++I)
	ProtoLocs.push_back(ReadSourceLocation());
	CD->setProtocolList(ProtoRefs.data(), NumProtoRefs, ProtoLocs.data(),
	Reader.getContext());
	}

	void ASTDeclReader::VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *CAD) {
	VisitNamedDecl(CAD);
	CAD->setClassInterface(ReadDeclAs<ObjCInterfaceDecl>());
	}

	void ASTDeclReader::VisitObjCPropertyDecl(ObjCPropertyDecl *D) {
	VisitNamedDecl(D);
	D->setAtLoc(ReadSourceLocation());
	D->setLParenLoc(ReadSourceLocation());
	QualType T = Record.readType();
	TypeSourceInfo *TSI = GetTypeSourceInfo();
	D->setType(T, TSI);
	D->setPropertyAttributes(
	(ObjCPropertyDecl::PropertyAttributeKind)Record.readInt());
	D->setPropertyAttributesAsWritten(
	(ObjCPropertyDecl::PropertyAttributeKind)Record.readInt());
	D->setPropertyImplementation(
	(ObjCPropertyDecl::PropertyControl)Record.readInt());
	DeclarationName GetterName = Record.readDeclarationName();
	SourceLocation GetterLoc = ReadSourceLocation();
	D->setGetterName(GetterName.getObjCSelector(), GetterLoc);
	DeclarationName SetterName = Record.readDeclarationName();
	SourceLocation SetterLoc = ReadSourceLocation();
	D->setSetterName(SetterName.getObjCSelector(), SetterLoc);
	D->setGetterMethodDecl(ReadDeclAs<ObjCMethodDecl>());
	D->setSetterMethodDecl(ReadDeclAs<ObjCMethodDecl>());
	D->setPropertyIvarDecl(ReadDeclAs<ObjCIvarDecl>());
	}

	void ASTDeclReader::VisitObjCImplDecl(ObjCImplDecl *D) {
	VisitObjCContainerDecl(D);
	D->setClassInterface(ReadDeclAs<ObjCInterfaceDecl>());
	}

	void ASTDeclReader::VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D) {
	VisitObjCImplDecl(D);
	D->CategoryNameLoc = ReadSourceLocation();
	}

	void ASTDeclReader::VisitObjCImplementationDecl(ObjCImplementationDecl *D) {
	VisitObjCImplDecl(D);
	D->setSuperClass(ReadDeclAs<ObjCInterfaceDecl>());
	D->SuperLoc = ReadSourceLocation();
	D->setIvarLBraceLoc(ReadSourceLocation());
	D->setIvarRBraceLoc(ReadSourceLocation());
	D->setHasNonZeroConstructors(Record.readInt());
	D->setHasDestructors(Record.readInt());
	D->NumIvarInitializers = Record.readInt();
	if (D->NumIvarInitializers)
	D->IvarInitializers = ReadGlobalOffset();
	}

	void ASTDeclReader::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) {
	VisitDecl(D);
	D->setAtLoc(ReadSourceLocation());
	D->setPropertyDecl(ReadDeclAs<ObjCPropertyDecl>());
	D->PropertyIvarDecl = ReadDeclAs<ObjCIvarDecl>();
	D->IvarLoc = ReadSourceLocation();
	D->setGetterCXXConstructor(Record.readExpr());
	D->setSetterCXXAssignment(Record.readExpr());
	}

	void ASTDeclReader::VisitFieldDecl(FieldDecl *FD) {
	VisitDeclaratorDecl(FD);
	FD->Mutable = Record.readInt();
	if (int BitWidthOrInitializer = Record.readInt()) {
	FD->InitStorage.setInt(
	static_cast<FieldDecl::InitStorageKind>(BitWidthOrInitializer - 1));
	if (FD->InitStorage.getInt() == FieldDecl::ISK_CapturedVLAType) {
	// Read captured variable length array.
	FD->InitStorage.setPointer(Record.readType().getAsOpaquePtr());
	} else {
	FD->InitStorage.setPointer(Record.readExpr());
	}
	}
	if (!FD->getDeclName()) {
	if (FieldDecl *Tmpl = ReadDeclAs<FieldDecl>())
	Reader.getContext().setInstantiatedFromUnnamedFieldDecl(FD, Tmpl);
	}
	mergeMergeable(FD);
	}

	void ASTDeclReader::VisitMSPropertyDecl(MSPropertyDecl *PD) {
	VisitDeclaratorDecl(PD);
	PD->GetterId = Record.getIdentifierInfo();
	PD->SetterId = Record.getIdentifierInfo();
	}

	void ASTDeclReader::VisitIndirectFieldDecl(IndirectFieldDecl *FD) {
	VisitValueDecl(FD);

	FD->ChainingSize = Record.readInt();
	assert(FD->ChainingSize >= 2 && "Anonymous chaining must be >= 2");
	FD->Chaining = new (Reader.getContext())NamedDecl*[FD->ChainingSize];

	for (unsigned I = 0; I != FD->ChainingSize; ++I)
	FD->Chaining[I] = ReadDeclAs<NamedDecl>();

	mergeMergeable(FD);
	}

	ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarDeclImpl(VarDecl *VD) {
	RedeclarableResult Redecl = VisitRedeclarable(VD);
	VisitDeclaratorDecl(VD);

	VD->VarDeclBits.SClass = (StorageClass)Record.readInt();
	VD->VarDeclBits.TSCSpec = Record.readInt();
	VD->VarDeclBits.InitStyle = Record.readInt();
	if (!isa<ParmVarDecl>(VD)) {
	VD->NonParmVarDeclBits.IsThisDeclarationADemotedDefinition =
	Record.readInt();
	VD->NonParmVarDeclBits.ExceptionVar = Record.readInt();
	VD->NonParmVarDeclBits.NRVOVariable = Record.readInt();
	VD->NonParmVarDeclBits.CXXForRangeDecl = Record.readInt();
	VD->NonParmVarDeclBits.ARCPseudoStrong = Record.readInt();
	VD->NonParmVarDeclBits.IsInline = Record.readInt();
	VD->NonParmVarDeclBits.IsInlineSpecified = Record.readInt();
	VD->NonParmVarDeclBits.IsConstexpr = Record.readInt();
	VD->NonParmVarDeclBits.IsInitCapture = Record.readInt();
	VD->NonParmVarDeclBits.PreviousDeclInSameBlockScope = Record.readInt();
	VD->NonParmVarDeclBits.ImplicitParamKind = Record.readInt();
	}
	Linkage VarLinkage = Linkage(Record.readInt());
	VD->setCachedLinkage(VarLinkage);

	// Reconstruct the one piece of the IdentifierNamespace that we need.
	if (VD->getStorageClass() == SC_Extern && VarLinkage != NoLinkage &&
	VD->getLexicalDeclContext()->isFunctionOrMethod())
	VD->setLocalExternDecl();

	if (uint64_t Val = Record.readInt()) {
	VD->setInit(Record.readExpr());
	if (Val > 1) { // IsInitKnownICE = 1, IsInitNotICE = 2, IsInitICE = 3
	EvaluatedStmt *Eval = VD->ensureEvaluatedStmt();
	Eval->CheckedICE = true;
	Eval->IsICE = Val == 3;
	}
	}

	enum VarKind {
	VarNotTemplate = 0, VarTemplate, StaticDataMemberSpecialization
	};
	switch ((VarKind)Record.readInt()) {
	case VarNotTemplate:
	// Only true variables (not parameters or implicit parameters) can be
	// merged; the other kinds are not really redeclarable at all.
	if (!isa<ParmVarDecl>(VD) && !isa<ImplicitParamDecl>(VD) &&
	!isa<VarTemplateSpecializationDecl>(VD))
	mergeRedeclarable(VD, Redecl);
	break;
	case VarTemplate:
	// Merged when we merge the template.
	VD->setDescribedVarTemplate(ReadDeclAs<VarTemplateDecl>());
	break;
	case StaticDataMemberSpecialization: { // HasMemberSpecializationInfo.
	VarDecl *Tmpl = ReadDeclAs<VarDecl>();
	TemplateSpecializationKind TSK =
	(TemplateSpecializationKind)Record.readInt();
	SourceLocation POI = ReadSourceLocation();
	Reader.getContext().setInstantiatedFromStaticDataMember(VD, Tmpl, TSK,POI);
	mergeRedeclarable(VD, Redecl);
	break;
	}
	}

	return Redecl;
	}

	void ASTDeclReader::VisitImplicitParamDecl(ImplicitParamDecl *PD) {
	VisitVarDecl(PD);
	}

	void ASTDeclReader::VisitParmVarDecl(ParmVarDecl *PD) {
	VisitVarDecl(PD);
	unsigned isObjCMethodParam = Record.readInt();
	unsigned scopeDepth = Record.readInt();
	unsigned scopeIndex = Record.readInt();
	unsigned declQualifier = Record.readInt();
	if (isObjCMethodParam) {
	assert(scopeDepth == 0);
	PD->setObjCMethodScopeInfo(scopeIndex);
	PD->ParmVarDeclBits.ScopeDepthOrObjCQuals = declQualifier;
	} else {
	PD->setScopeInfo(scopeDepth, scopeIndex);
	}
	PD->ParmVarDeclBits.IsKNRPromoted = Record.readInt();
	PD->ParmVarDeclBits.HasInheritedDefaultArg = Record.readInt();
	if (Record.readInt()) // hasUninstantiatedDefaultArg.
	PD->setUninstantiatedDefaultArg(Record.readExpr());

	// FIXME: If this is a redeclaration of a function from another module, handle
	// inheritance of default arguments.
	}

	void ASTDeclReader::VisitDecompositionDecl(DecompositionDecl *DD) {
	VisitVarDecl(DD);
	BindingDecl *BDs = DD->getTrailingObjects<BindingDecl>();
	for (unsigned I = 0; I != DD->NumBindings; ++I)
	BDs[I] = ReadDeclAs<BindingDecl>();
	}

	void ASTDeclReader::VisitBindingDecl(BindingDecl *BD) {
	VisitValueDecl(BD);
	BD->Binding = Record.readExpr();
	}

	void ASTDeclReader::VisitFileScopeAsmDecl(FileScopeAsmDecl *AD) {
	VisitDecl(AD);
	AD->setAsmString(cast<StringLiteral>(Record.readExpr()));
	AD->setRParenLoc(ReadSourceLocation());
	}

	void ASTDeclReader::VisitBlockDecl(BlockDecl *BD) {
	VisitDecl(BD);
	BD->setBody(cast_or_null<CompoundStmt>(Record.readStmt()));
	BD->setSignatureAsWritten(GetTypeSourceInfo());
	unsigned NumParams = Record.readInt();
	SmallVector<ParmVarDecl *, 16> Params;
	Params.reserve(NumParams);
	for (unsigned I = 0; I != NumParams; ++I)
	Params.push_back(ReadDeclAs<ParmVarDecl>());
	BD->setParams(Params);

	BD->setIsVariadic(Record.readInt());
	BD->setBlockMissingReturnType(Record.readInt());
	BD->setIsConversionFromLambda(Record.readInt());

	bool capturesCXXThis = Record.readInt();
	unsigned numCaptures = Record.readInt();
	SmallVector<BlockDecl::Capture, 16> captures;
	captures.reserve(numCaptures);
	for (unsigned i = 0; i != numCaptures; ++i) {
	VarDecl *decl = ReadDeclAs<VarDecl>();
	unsigned flags = Record.readInt();
	bool byRef = (flags & 1);
	bool nested = (flags & 2);
	Expr *copyExpr = ((flags & 4) ? Record.readExpr() : nullptr);

	captures.push_back(BlockDecl::Capture(decl, byRef, nested, copyExpr));
	}
	BD->setCaptures(Reader.getContext(), captures, capturesCXXThis);
	}

	void ASTDeclReader::VisitCapturedDecl(CapturedDecl *CD) {
	VisitDecl(CD);
	unsigned ContextParamPos = Record.readInt();
	CD->setNothrow(Record.readInt() != 0);
	// Body is set by VisitCapturedStmt.
	for (unsigned I = 0; I < CD->NumParams; ++I) {
	if (I != ContextParamPos)
	CD->setParam(I, ReadDeclAs<ImplicitParamDecl>());
	else
	CD->setContextParam(I, ReadDeclAs<ImplicitParamDecl>());
	}
	}

	void ASTDeclReader::VisitLinkageSpecDecl(LinkageSpecDecl *D) {
	VisitDecl(D);
	D->setLanguage((LinkageSpecDecl::LanguageIDs)Record.readInt());
	D->setExternLoc(ReadSourceLocation());
	D->setRBraceLoc(ReadSourceLocation());
	}

	void ASTDeclReader::VisitExportDecl(ExportDecl *D) {
	VisitDecl(D);
	D->RBraceLoc = ReadSourceLocation();
	}

	void ASTDeclReader::VisitLabelDecl(LabelDecl *D) {
	VisitNamedDecl(D);
	D->setLocStart(ReadSourceLocation());
	}

	void ASTDeclReader::VisitNamespaceDecl(NamespaceDecl *D) {
	RedeclarableResult Redecl = VisitRedeclarable(D);
	VisitNamedDecl(D);
	D->setInline(Record.readInt());
	D->LocStart = ReadSourceLocation();
	D->RBraceLoc = ReadSourceLocation();

	// Defer loading the anonymous namespace until we've finished merging
	// this namespace; loading it might load a later declaration of the
	// same namespace, and we have an invariant that older declarations
	// get merged before newer ones try to merge.
	GlobalDeclID AnonNamespace = 0;
	if (Redecl.getFirstID() == ThisDeclID) {
	AnonNamespace = ReadDeclID();
	} else {
	// Link this namespace back to the first declaration, which has already
	// been deserialized.
	D->AnonOrFirstNamespaceAndInline.setPointer(D->getFirstDecl());
	}

	mergeRedeclarable(D, Redecl);

	if (AnonNamespace) {
	// Each module has its own anonymous namespace, which is disjoint from
	// any other module's anonymous namespaces, so don't attach the anonymous
	// namespace at all.
	NamespaceDecl *Anon = cast<NamespaceDecl>(Reader.GetDecl(AnonNamespace));
	if (!Record.isModule())
	D->setAnonymousNamespace(Anon);
	}
	}

	void ASTDeclReader::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) {
	RedeclarableResult Redecl = VisitRedeclarable(D);
	VisitNamedDecl(D);
	D->NamespaceLoc = ReadSourceLocation();
	D->IdentLoc = ReadSourceLocation();
	D->QualifierLoc = Record.readNestedNameSpecifierLoc();
	D->Namespace = ReadDeclAs<NamedDecl>();
	mergeRedeclarable(D, Redecl);
	}

	void ASTDeclReader::VisitUsingDecl(UsingDecl *D) {
	VisitNamedDecl(D);
	D->setUsingLoc(ReadSourceLocation());
	D->QualifierLoc = Record.readNestedNameSpecifierLoc();
	ReadDeclarationNameLoc(D->DNLoc, D->getDeclName());
	D->FirstUsingShadow.setPointer(ReadDeclAs<UsingShadowDecl>());
	D->setTypename(Record.readInt());
	if (NamedDecl *Pattern = ReadDeclAs<NamedDecl>())
	Reader.getContext().setInstantiatedFromUsingDecl(D, Pattern);
	mergeMergeable(D);
	}

	void ASTDeclReader::VisitUsingPackDecl(UsingPackDecl *D) {
	VisitNamedDecl(D);
	D->InstantiatedFrom = ReadDeclAs<NamedDecl>();
	NamedDecl *Expansions = D->getTrailingObjects<NamedDecl>();
	for (unsigned I = 0; I != D->NumExpansions; ++I)
	Expansions[I] = ReadDeclAs<NamedDecl>();
	mergeMergeable(D);
	}

	void ASTDeclReader::VisitUsingShadowDecl(UsingShadowDecl *D) {
	RedeclarableResult Redecl = VisitRedeclarable(D);
	VisitNamedDecl(D);
	D->setTargetDecl(ReadDeclAs<NamedDecl>());
	D->UsingOrNextShadow = ReadDeclAs<NamedDecl>();
	UsingShadowDecl *Pattern = ReadDeclAs<UsingShadowDecl>();
	if (Pattern)
	Reader.getContext().setInstantiatedFromUsingShadowDecl(D, Pattern);
	mergeRedeclarable(D, Redecl);
	}

	void ASTDeclReader::VisitConstructorUsingShadowDecl(
	ConstructorUsingShadowDecl *D) {
	VisitUsingShadowDecl(D);
	D->NominatedBaseClassShadowDecl = ReadDeclAs<ConstructorUsingShadowDecl>();
	D->ConstructedBaseClassShadowDecl = ReadDeclAs<ConstructorUsingShadowDecl>();
	D->IsVirtual = Record.readInt();
	}

	void ASTDeclReader::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) {
	VisitNamedDecl(D);
	D->UsingLoc = ReadSourceLocation();
	D->NamespaceLoc = ReadSourceLocation();
	D->QualifierLoc = Record.readNestedNameSpecifierLoc();
	D->NominatedNamespace = ReadDeclAs<NamedDecl>();
	D->CommonAncestor = ReadDeclAs<DeclContext>();
	}

	void ASTDeclReader::VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D) {
	VisitValueDecl(D);
	D->setUsingLoc(ReadSourceLocation());
	D->QualifierLoc = Record.readNestedNameSpecifierLoc();
	ReadDeclarationNameLoc(D->DNLoc, D->getDeclName());
	D->EllipsisLoc = ReadSourceLocation();
	mergeMergeable(D);
	}

	void ASTDeclReader::VisitUnresolvedUsingTypenameDecl(
	UnresolvedUsingTypenameDecl *D) {
	VisitTypeDecl(D);
	D->TypenameLocation = ReadSourceLocation();
	D->QualifierLoc = Record.readNestedNameSpecifierLoc();
	D->EllipsisLoc = ReadSourceLocation();
	mergeMergeable(D);
	}

	void ASTDeclReader::ReadCXXDefinitionData(
	struct CXXRecordDecl::DefinitionData &Data, const CXXRecordDecl *D) {
	// Note: the caller has deserialized the IsLambda bit already.
	Data.UserDeclaredConstructor = Record.readInt();
	Data.UserDeclaredSpecialMembers = Record.readInt();
	Data.Aggregate = Record.readInt();
	Data.PlainOldData = Record.readInt();
	Data.Empty = Record.readInt();
	Data.Polymorphic = Record.readInt();
	Data.Abstract = Record.readInt();
	Data.IsStandardLayout = Record.readInt();
	Data.HasNoNonEmptyBases = Record.readInt();
	Data.HasPrivateFields = Record.readInt();
	Data.HasProtectedFields = Record.readInt();
	Data.HasPublicFields = Record.readInt();
	Data.HasMutableFields = Record.readInt();
	Data.HasVariantMembers = Record.readInt();
	Data.HasOnlyCMembers = Record.readInt();
	Data.HasInClassInitializer = Record.readInt();
	Data.HasUninitializedReferenceMember = Record.readInt();
	Data.HasUninitializedFields = Record.readInt();
	Data.HasInheritedConstructor = Record.readInt();
	Data.HasInheritedAssignment = Record.readInt();
	+ Data.NeedOverloadResolutionForCopyConstructor = Record.readInt();
	Data.NeedOverloadResolutionForMoveConstructor = Record.readInt();
	Data.NeedOverloadResolutionForMoveAssignment = Record.readInt();
	Data.NeedOverloadResolutionForDestructor = Record.readInt();
	+ Data.DefaultedCopyConstructorIsDeleted = Record.readInt();
	Data.DefaultedMoveConstructorIsDeleted = Record.readInt();
	Data.DefaultedMoveAssignmentIsDeleted = Record.readInt();
	Data.DefaultedDestructorIsDeleted = Record.readInt();
	Data.HasTrivialSpecialMembers = Record.readInt();
	Data.DeclaredNonTrivialSpecialMembers = Record.readInt();
	Data.HasIrrelevantDestructor = Record.readInt();
	Data.HasConstexprNonCopyMoveConstructor = Record.readInt();
	Data.HasDefaultedDefaultConstructor = Record.readInt();
	+ Data.CanPassInRegisters = Record.readInt();
	Data.DefaultedDefaultConstructorIsConstexpr = Record.readInt();
	Data.HasConstexprDefaultConstructor = Record.readInt();
	Data.HasNonLiteralTypeFieldsOrBases = Record.readInt();
	Data.ComputedVisibleConversions = Record.readInt();
	Data.UserProvidedDefaultConstructor = Record.readInt();
	Data.DeclaredSpecialMembers = Record.readInt();
	Data.ImplicitCopyConstructorCanHaveConstParamForVBase = Record.readInt();
	Data.ImplicitCopyConstructorCanHaveConstParamForNonVBase = Record.readInt();
	Data.ImplicitCopyAssignmentHasConstParam = Record.readInt();
	Data.HasDeclaredCopyConstructorWithConstParam = Record.readInt();
	Data.HasDeclaredCopyAssignmentWithConstParam = Record.readInt();
	Data.ODRHash = Record.readInt();
	Data.HasODRHash = true;

	if (Record.readInt()) {
	Reader.BodySource[D] = Loc.F->Kind == ModuleKind::MK_MainFile
	? ExternalASTSource::EK_Never
	: ExternalASTSource::EK_Always;
	}

	Data.NumBases = Record.readInt();
	if (Data.NumBases)
	Data.Bases = ReadGlobalOffset();
	Data.NumVBases = Record.readInt();
	if (Data.NumVBases)
	Data.VBases = ReadGlobalOffset();

	Record.readUnresolvedSet(Data.Conversions);
	Record.readUnresolvedSet(Data.VisibleConversions);
	assert(Data.Definition && "Data.Definition should be already set!");
	Data.FirstFriend = ReadDeclID();

	if (Data.IsLambda) {
	typedef LambdaCapture Capture;
	CXXRecordDecl::LambdaDefinitionData &Lambda
	= static_cast<CXXRecordDecl::LambdaDefinitionData &>(Data);
	Lambda.Dependent = Record.readInt();
	Lambda.IsGenericLambda = Record.readInt();
	Lambda.CaptureDefault = Record.readInt();
	Lambda.NumCaptures = Record.readInt();
	Lambda.NumExplicitCaptures = Record.readInt();
	Lambda.ManglingNumber = Record.readInt();
	Lambda.ContextDecl = ReadDeclID();
	Lambda.Captures = (Capture *)Reader.getContext().Allocate(
	sizeof(Capture) * Lambda.NumCaptures);
	Capture *ToCapture = Lambda.Captures;
	Lambda.MethodTyInfo = GetTypeSourceInfo();
	for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) {
	SourceLocation Loc = ReadSourceLocation();
	bool IsImplicit = Record.readInt();
	LambdaCaptureKind Kind = static_cast<LambdaCaptureKind>(Record.readInt());
	switch (Kind) {
	case LCK_StarThis:
	case LCK_This:
	case LCK_VLAType:
	*ToCapture++ = Capture(Loc, IsImplicit, Kind, nullptr,SourceLocation());
	break;
	case LCK_ByCopy:
	case LCK_ByRef:
	VarDecl *Var = ReadDeclAs<VarDecl>();
	SourceLocation EllipsisLoc = ReadSourceLocation();
	*ToCapture++ = Capture(Loc, IsImplicit, Kind, Var, EllipsisLoc);
	break;
	}
	}
	}
	}

	void ASTDeclReader::MergeDefinitionData(
	CXXRecordDecl *D, struct CXXRecordDecl::DefinitionData &&MergeDD) {
	assert(D->DefinitionData &&
	"merging class definition into non-definition");
	auto &DD = *D->DefinitionData;

	if (DD.Definition != MergeDD.Definition) {
	// Track that we merged the definitions.
	Reader.MergedDeclContexts.insert(std::make_pair(MergeDD.Definition,
	DD.Definition));
	Reader.PendingDefinitions.erase(MergeDD.Definition);
	MergeDD.Definition->IsCompleteDefinition = false;
	Reader.mergeDefinitionVisibility(DD.Definition, MergeDD.Definition);
	assert(Reader.Lookups.find(MergeDD.Definition) == Reader.Lookups.end() &&
	"already loaded pending lookups for merged definition");
	}

	auto PFDI = Reader.PendingFakeDefinitionData.find(&DD);
	if (PFDI != Reader.PendingFakeDefinitionData.end() &&
	PFDI->second == ASTReader::PendingFakeDefinitionKind::Fake) {
	// We faked up this definition data because we found a class for which we'd
	// not yet loaded the definition. Replace it with the real thing now.
	assert(!DD.IsLambda && !MergeDD.IsLambda && "faked up lambda definition?");
	PFDI->second = ASTReader::PendingFakeDefinitionKind::FakeLoaded;

	// Don't change which declaration is the definition; that is required
	// to be invariant once we select it.
	auto *Def = DD.Definition;
	DD = std::move(MergeDD);
	DD.Definition = Def;
	return;
	}

	// FIXME: Move this out into a .def file?
	bool DetectedOdrViolation = false;
	#define OR_FIELD(Field) DD.Field \|= MergeDD.Field;
	#define MATCH_FIELD(Field) \
	DetectedOdrViolation \|= DD.Field != MergeDD.Field; \
	OR_FIELD(Field)
	MATCH_FIELD(UserDeclaredConstructor)
	MATCH_FIELD(UserDeclaredSpecialMembers)
	MATCH_FIELD(Aggregate)
	MATCH_FIELD(PlainOldData)
	MATCH_FIELD(Empty)
	MATCH_FIELD(Polymorphic)
	MATCH_FIELD(Abstract)
	MATCH_FIELD(IsStandardLayout)
	MATCH_FIELD(HasNoNonEmptyBases)
	MATCH_FIELD(HasPrivateFields)
	MATCH_FIELD(HasProtectedFields)
	MATCH_FIELD(HasPublicFields)
	MATCH_FIELD(HasMutableFields)
	MATCH_FIELD(HasVariantMembers)
	MATCH_FIELD(HasOnlyCMembers)
	MATCH_FIELD(HasInClassInitializer)
	MATCH_FIELD(HasUninitializedReferenceMember)
	MATCH_FIELD(HasUninitializedFields)
	MATCH_FIELD(HasInheritedConstructor)
	MATCH_FIELD(HasInheritedAssignment)
	+ MATCH_FIELD(NeedOverloadResolutionForCopyConstructor)
	MATCH_FIELD(NeedOverloadResolutionForMoveConstructor)
	MATCH_FIELD(NeedOverloadResolutionForMoveAssignment)
	MATCH_FIELD(NeedOverloadResolutionForDestructor)
	+ MATCH_FIELD(DefaultedCopyConstructorIsDeleted)
	MATCH_FIELD(DefaultedMoveConstructorIsDeleted)
	MATCH_FIELD(DefaultedMoveAssignmentIsDeleted)
	MATCH_FIELD(DefaultedDestructorIsDeleted)
	OR_FIELD(HasTrivialSpecialMembers)
	OR_FIELD(DeclaredNonTrivialSpecialMembers)
	MATCH_FIELD(HasIrrelevantDestructor)
	OR_FIELD(HasConstexprNonCopyMoveConstructor)
	OR_FIELD(HasDefaultedDefaultConstructor)
	+ MATCH_FIELD(CanPassInRegisters)
	MATCH_FIELD(DefaultedDefaultConstructorIsConstexpr)
	OR_FIELD(HasConstexprDefaultConstructor)
	MATCH_FIELD(HasNonLiteralTypeFieldsOrBases)
	// ComputedVisibleConversions is handled below.
	MATCH_FIELD(UserProvidedDefaultConstructor)
	OR_FIELD(DeclaredSpecialMembers)
	MATCH_FIELD(ImplicitCopyConstructorCanHaveConstParamForVBase)
	MATCH_FIELD(ImplicitCopyConstructorCanHaveConstParamForNonVBase)
	MATCH_FIELD(ImplicitCopyAssignmentHasConstParam)
	OR_FIELD(HasDeclaredCopyConstructorWithConstParam)
	OR_FIELD(HasDeclaredCopyAssignmentWithConstParam)
	MATCH_FIELD(IsLambda)
	#undef OR_FIELD
	#undef MATCH_FIELD

	if (DD.NumBases != MergeDD.NumBases \|\| DD.NumVBases != MergeDD.NumVBases)
	DetectedOdrViolation = true;
	// FIXME: Issue a diagnostic if the base classes don't match when we come
	// to lazily load them.

	// FIXME: Issue a diagnostic if the list of conversion functions doesn't
	// match when we come to lazily load them.
	if (MergeDD.ComputedVisibleConversions && !DD.ComputedVisibleConversions) {
	DD.VisibleConversions = std::move(MergeDD.VisibleConversions);
	DD.ComputedVisibleConversions = true;
	}

	// FIXME: Issue a diagnostic if FirstFriend doesn't match when we come to
	// lazily load it.

	if (DD.IsLambda) {
	// FIXME: ODR-checking for merging lambdas (this happens, for instance,
	// when they occur within the body of a function template specialization).
	}

	if (D->getODRHash() != MergeDD.ODRHash) {
	DetectedOdrViolation = true;
	}

	if (DetectedOdrViolation)
	Reader.PendingOdrMergeFailures[DD.Definition].push_back(MergeDD.Definition);
	}

	void ASTDeclReader::ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update) {
	struct CXXRecordDecl::DefinitionData *DD;
	ASTContext &C = Reader.getContext();

	// Determine whether this is a lambda closure type, so that we can
	// allocate the appropriate DefinitionData structure.
	bool IsLambda = Record.readInt();
	if (IsLambda)
	DD = new (C) CXXRecordDecl::LambdaDefinitionData(D, nullptr, false, false,
	LCD_None);
	else
	DD = new (C) struct CXXRecordDecl::DefinitionData(D);

	ReadCXXDefinitionData(*DD, D);

	// We might already have a definition for this record. This can happen either
	// because we're reading an update record, or because we've already done some
	// merging. Either way, just merge into it.
	CXXRecordDecl *Canon = D->getCanonicalDecl();
	if (Canon->DefinitionData) {
	MergeDefinitionData(Canon, std::move(*DD));
	D->DefinitionData = Canon->DefinitionData;
	return;
	}

	// Mark this declaration as being a definition.
	D->IsCompleteDefinition = true;
	D->DefinitionData = DD;

	// If this is not the first declaration or is an update record, we can have
	// other redeclarations already. Make a note that we need to propagate the
	// DefinitionData pointer onto them.
	if (Update \|\| Canon != D) {
	Canon->DefinitionData = D->DefinitionData;
	Reader.PendingDefinitions.insert(D);
	}
	}

	ASTDeclReader::RedeclarableResult
	ASTDeclReader::VisitCXXRecordDeclImpl(CXXRecordDecl *D) {
	RedeclarableResult Redecl = VisitRecordDeclImpl(D);

	ASTContext &C = Reader.getContext();

	enum CXXRecKind {
	CXXRecNotTemplate = 0, CXXRecTemplate, CXXRecMemberSpecialization
	};
	switch ((CXXRecKind)Record.readInt()) {
	case CXXRecNotTemplate:
	// Merged when we merge the folding set entry in the primary template.
	if (!isa<ClassTemplateSpecializationDecl>(D))
	mergeRedeclarable(D, Redecl);
	break;
	case CXXRecTemplate: {
	// Merged when we merge the template.
	ClassTemplateDecl *Template = ReadDeclAs<ClassTemplateDecl>();
	D->TemplateOrInstantiation = Template;
	if (!Template->getTemplatedDecl()) {
	// We've not actually loaded the ClassTemplateDecl yet, because we're
	// currently being loaded as its pattern. Rely on it to set up our
	// TypeForDecl (see VisitClassTemplateDecl).
	//
	// Beware: we do not yet know our canonical declaration, and may still
	// get merged once the surrounding class template has got off the ground.
	TypeIDForTypeDecl = 0;
	}
	break;
	}
	case CXXRecMemberSpecialization: {
	CXXRecordDecl *RD = ReadDeclAs<CXXRecordDecl>();
	TemplateSpecializationKind TSK =
	(TemplateSpecializationKind)Record.readInt();
	SourceLocation POI = ReadSourceLocation();
	MemberSpecializationInfo *MSI = new (C) MemberSpecializationInfo(RD, TSK);
	MSI->setPointOfInstantiation(POI);
	D->TemplateOrInstantiation = MSI;
	mergeRedeclarable(D, Redecl);
	break;
	}
	}

	bool WasDefinition = Record.readInt();
	if (WasDefinition)
	ReadCXXRecordDefinition(D, /Update/false);
	else
	// Propagate DefinitionData pointer from the canonical declaration.
	D->DefinitionData = D->getCanonicalDecl()->DefinitionData;

	// Lazily load the key function to avoid deserializing every method so we can
	// compute it.
	if (WasDefinition) {
	DeclID KeyFn = ReadDeclID();
	if (KeyFn && D->IsCompleteDefinition)
	// FIXME: This is wrong for the ARM ABI, where some other module may have
	// made this function no longer be a key function. We need an update
	// record or similar for that case.
	C.KeyFunctions[D] = KeyFn;
	}

	return Redecl;
	}

	void ASTDeclReader::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) {
	VisitFunctionDecl(D);
	}

	void ASTDeclReader::VisitCXXMethodDecl(CXXMethodDecl *D) {
	VisitFunctionDecl(D);

	unsigned NumOverridenMethods = Record.readInt();
	if (D->isCanonicalDecl()) {
	while (NumOverridenMethods--) {
	// Avoid invariant checking of CXXMethodDecl::addOverriddenMethod,
	// MD may be initializing.
	if (CXXMethodDecl *MD = ReadDeclAs<CXXMethodDecl>())
	Reader.getContext().addOverriddenMethod(D, MD->getCanonicalDecl());
	}
	} else {
	// We don't care about which declarations this used to override; we get
	// the relevant information from the canonical declaration.
	Record.skipInts(NumOverridenMethods);
	}
	}

	void ASTDeclReader::VisitCXXConstructorDecl(CXXConstructorDecl *D) {
	// We need the inherited constructor information to merge the declaration,
	// so we have to read it before we call VisitCXXMethodDecl.
	if (D->isInheritingConstructor()) {
	auto *Shadow = ReadDeclAs<ConstructorUsingShadowDecl>();
	auto *Ctor = ReadDeclAs<CXXConstructorDecl>();
	*D->getTrailingObjects<InheritedConstructor>() =
	InheritedConstructor(Shadow, Ctor);
	}

	VisitCXXMethodDecl(D);
	}

	void ASTDeclReader::VisitCXXDestructorDecl(CXXDestructorDecl *D) {
	VisitCXXMethodDecl(D);

	if (auto *OperatorDelete = ReadDeclAs<FunctionDecl>()) {
	auto *Canon = cast<CXXDestructorDecl>(D->getCanonicalDecl());
	// FIXME: Check consistency if we have an old and new operator delete.
	if (!Canon->OperatorDelete)
	Canon->OperatorDelete = OperatorDelete;
	}
	}

	void ASTDeclReader::VisitCXXConversionDecl(CXXConversionDecl *D) {
	VisitCXXMethodDecl(D);
	}

	void ASTDeclReader::VisitImportDecl(ImportDecl *D) {
	VisitDecl(D);
	D->ImportedAndComplete.setPointer(readModule());
	D->ImportedAndComplete.setInt(Record.readInt());
	SourceLocation *StoredLocs = D->getTrailingObjects<SourceLocation>();
	for (unsigned I = 0, N = Record.back(); I != N; ++I)
	StoredLocs[I] = ReadSourceLocation();
	Record.skipInts(1); // The number of stored source locations.
	}

	void ASTDeclReader::VisitAccessSpecDecl(AccessSpecDecl *D) {
	VisitDecl(D);
	D->setColonLoc(ReadSourceLocation());
	}

	void ASTDeclReader::VisitFriendDecl(FriendDecl *D) {
	VisitDecl(D);
	if (Record.readInt()) // hasFriendDecl
	D->Friend = ReadDeclAs<NamedDecl>();
	else
	D->Friend = GetTypeSourceInfo();
	for (unsigned i = 0; i != D->NumTPLists; ++i)
	D->getTrailingObjects<TemplateParameterList *>()[i] =
	Record.readTemplateParameterList();
	D->NextFriend = ReadDeclID();
	D->UnsupportedFriend = (Record.readInt() != 0);
	D->FriendLoc = ReadSourceLocation();
	}

	void ASTDeclReader::VisitFriendTemplateDecl(FriendTemplateDecl *D) {
	VisitDecl(D);
	unsigned NumParams = Record.readInt();
	D->NumParams = NumParams;
	D->Params = new TemplateParameterList*[NumParams];
	for (unsigned i = 0; i != NumParams; ++i)
	D->Params[i] = Record.readTemplateParameterList();
	if (Record.readInt()) // HasFriendDecl
	D->Friend = ReadDeclAs<NamedDecl>();
	else
	D->Friend = GetTypeSourceInfo();
	D->FriendLoc = ReadSourceLocation();
	}

	DeclID ASTDeclReader::VisitTemplateDecl(TemplateDecl *D) {
	VisitNamedDecl(D);

	DeclID PatternID = ReadDeclID();
	NamedDecl *TemplatedDecl = cast_or_null<NamedDecl>(Reader.GetDecl(PatternID));
	TemplateParameterList *TemplateParams = Record.readTemplateParameterList();
	// FIXME handle associated constraints
	D->init(TemplatedDecl, TemplateParams);

	return PatternID;
	}

	ASTDeclReader::RedeclarableResult
	ASTDeclReader::VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D) {
	RedeclarableResult Redecl = VisitRedeclarable(D);

	// Make sure we've allocated the Common pointer first. We do this before
	// VisitTemplateDecl so that getCommonPtr() can be used during initialization.
	RedeclarableTemplateDecl *CanonD = D->getCanonicalDecl();
	if (!CanonD->Common) {
	CanonD->Common = CanonD->newCommon(Reader.getContext());
	Reader.PendingDefinitions.insert(CanonD);
	}
	D->Common = CanonD->Common;

	// If this is the first declaration of the template, fill in the information
	// for the 'common' pointer.
	if (ThisDeclID == Redecl.getFirstID()) {
	if (RedeclarableTemplateDecl *RTD
	= ReadDeclAs<RedeclarableTemplateDecl>()) {
	assert(RTD->getKind() == D->getKind() &&
	"InstantiatedFromMemberTemplate kind mismatch");
	D->setInstantiatedFromMemberTemplate(RTD);
	if (Record.readInt())
	D->setMemberSpecialization();
	}
	}

	DeclID PatternID = VisitTemplateDecl(D);
	D->IdentifierNamespace = Record.readInt();

	mergeRedeclarable(D, Redecl, PatternID);

	// If we merged the template with a prior declaration chain, merge the common
	// pointer.
	// FIXME: Actually merge here, don't just overwrite.
	D->Common = D->getCanonicalDecl()->Common;

	return Redecl;
	}

	void ASTDeclReader::VisitClassTemplateDecl(ClassTemplateDecl *D) {
	RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D);

	if (ThisDeclID == Redecl.getFirstID()) {
	// This ClassTemplateDecl owns a CommonPtr; read it to keep track of all of
	// the specializations.
	SmallVector<serialization::DeclID, 32> SpecIDs;
	ReadDeclIDList(SpecIDs);
	ASTDeclReader::AddLazySpecializations(D, SpecIDs);
	}

	if (D->getTemplatedDecl()->TemplateOrInstantiation) {
	// We were loaded before our templated declaration was. We've not set up
	// its corresponding type yet (see VisitCXXRecordDeclImpl), so reconstruct
	// it now.
	Reader.getContext().getInjectedClassNameType(
	D->getTemplatedDecl(), D->getInjectedClassNameSpecialization());
	}
	}

	void ASTDeclReader::VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D) {
	llvm_unreachable("BuiltinTemplates are not serialized");
	}

	/// TODO: Unify with ClassTemplateDecl version?
	/// May require unifying ClassTemplateDecl and
	/// VarTemplateDecl beyond TemplateDecl...
	void ASTDeclReader::VisitVarTemplateDecl(VarTemplateDecl *D) {
	RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D);

	if (ThisDeclID == Redecl.getFirstID()) {
	// This VarTemplateDecl owns a CommonPtr; read it to keep track of all of
	// the specializations.
	SmallVector<serialization::DeclID, 32> SpecIDs;
	ReadDeclIDList(SpecIDs);
	ASTDeclReader::AddLazySpecializations(D, SpecIDs);
	}
	}

	ASTDeclReader::RedeclarableResult
	ASTDeclReader::VisitClassTemplateSpecializationDeclImpl(
	ClassTemplateSpecializationDecl *D) {
	RedeclarableResult Redecl = VisitCXXRecordDeclImpl(D);

	ASTContext &C = Reader.getContext();
	if (Decl *InstD = ReadDecl()) {
	if (ClassTemplateDecl *CTD = dyn_cast<ClassTemplateDecl>(InstD)) {
	D->SpecializedTemplate = CTD;
	} else {
	SmallVector<TemplateArgument, 8> TemplArgs;
	Record.readTemplateArgumentList(TemplArgs);
	TemplateArgumentList *ArgList
	= TemplateArgumentList::CreateCopy(C, TemplArgs);
	ClassTemplateSpecializationDecl::SpecializedPartialSpecialization *PS
	= new (C) ClassTemplateSpecializationDecl::
	SpecializedPartialSpecialization();
	PS->PartialSpecialization
	= cast<ClassTemplatePartialSpecializationDecl>(InstD);
	PS->TemplateArgs = ArgList;
	D->SpecializedTemplate = PS;
	}
	}

	SmallVector<TemplateArgument, 8> TemplArgs;
	Record.readTemplateArgumentList(TemplArgs, /Canonicalize/ true);
	D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs);
	D->PointOfInstantiation = ReadSourceLocation();
	D->SpecializationKind = (TemplateSpecializationKind)Record.readInt();

	bool writtenAsCanonicalDecl = Record.readInt();
	if (writtenAsCanonicalDecl) {
	ClassTemplateDecl *CanonPattern = ReadDeclAs<ClassTemplateDecl>();
	if (D->isCanonicalDecl()) { // It's kept in the folding set.
	// Set this as, or find, the canonical declaration for this specialization
	ClassTemplateSpecializationDecl *CanonSpec;
	if (ClassTemplatePartialSpecializationDecl *Partial =
	dyn_cast<ClassTemplatePartialSpecializationDecl>(D)) {
	CanonSpec = CanonPattern->getCommonPtr()->PartialSpecializations
	.GetOrInsertNode(Partial);
	} else {
	CanonSpec =
	CanonPattern->getCommonPtr()->Specializations.GetOrInsertNode(D);
	}
	// If there was already a canonical specialization, merge into it.
	if (CanonSpec != D) {
	mergeRedeclarable<TagDecl>(D, CanonSpec, Redecl);

	// This declaration might be a definition. Merge with any existing
	// definition.
	if (auto *DDD = D->DefinitionData) {
	if (CanonSpec->DefinitionData)
	MergeDefinitionData(CanonSpec, std::move(*DDD));
	else
	CanonSpec->DefinitionData = D->DefinitionData;
	}
	D->DefinitionData = CanonSpec->DefinitionData;
	}
	}
	}

	// Explicit info.
	if (TypeSourceInfo *TyInfo = GetTypeSourceInfo()) {
	ClassTemplateSpecializationDecl::ExplicitSpecializationInfo *ExplicitInfo
	= new (C) ClassTemplateSpecializationDecl::ExplicitSpecializationInfo;
	ExplicitInfo->TypeAsWritten = TyInfo;
	ExplicitInfo->ExternLoc = ReadSourceLocation();
	ExplicitInfo->TemplateKeywordLoc = ReadSourceLocation();
	D->ExplicitInfo = ExplicitInfo;
	}

	return Redecl;
	}

	void ASTDeclReader::VisitClassTemplatePartialSpecializationDecl(
	ClassTemplatePartialSpecializationDecl *D) {
	RedeclarableResult Redecl = VisitClassTemplateSpecializationDeclImpl(D);

	D->TemplateParams = Record.readTemplateParameterList();
	D->ArgsAsWritten = Record.readASTTemplateArgumentListInfo();

	// These are read/set from/to the first declaration.
	if (ThisDeclID == Redecl.getFirstID()) {
	D->InstantiatedFromMember.setPointer(
	ReadDeclAs<ClassTemplatePartialSpecializationDecl>());
	D->InstantiatedFromMember.setInt(Record.readInt());
	}
	}

	void ASTDeclReader::VisitClassScopeFunctionSpecializationDecl(
	ClassScopeFunctionSpecializationDecl *D) {
	VisitDecl(D);
	D->Specialization = ReadDeclAs<CXXMethodDecl>();
	}

	void ASTDeclReader::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) {
	RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D);

	if (ThisDeclID == Redecl.getFirstID()) {
	// This FunctionTemplateDecl owns a CommonPtr; read it.
	SmallVector<serialization::DeclID, 32> SpecIDs;
	ReadDeclIDList(SpecIDs);
	ASTDeclReader::AddLazySpecializations(D, SpecIDs);
	}
	}

	/// TODO: Unify with ClassTemplateSpecializationDecl version?
	/// May require unifying ClassTemplate(Partial)SpecializationDecl and
	/// VarTemplate(Partial)SpecializationDecl with a new data
	/// structure Template(Partial)SpecializationDecl, and
	/// using Template(Partial)SpecializationDecl as input type.
	ASTDeclReader::RedeclarableResult
	ASTDeclReader::VisitVarTemplateSpecializationDeclImpl(
	VarTemplateSpecializationDecl *D) {
	RedeclarableResult Redecl = VisitVarDeclImpl(D);

	ASTContext &C = Reader.getContext();
	if (Decl *InstD = ReadDecl()) {
	if (VarTemplateDecl *VTD = dyn_cast<VarTemplateDecl>(InstD)) {
	D->SpecializedTemplate = VTD;
	} else {
	SmallVector<TemplateArgument, 8> TemplArgs;
	Record.readTemplateArgumentList(TemplArgs);
	TemplateArgumentList *ArgList = TemplateArgumentList::CreateCopy(
	C, TemplArgs);
	VarTemplateSpecializationDecl::SpecializedPartialSpecialization *PS =
	new (C)
	VarTemplateSpecializationDecl::SpecializedPartialSpecialization();
	PS->PartialSpecialization =
	cast<VarTemplatePartialSpecializationDecl>(InstD);
	PS->TemplateArgs = ArgList;
	D->SpecializedTemplate = PS;
	}
	}

	// Explicit info.
	if (TypeSourceInfo *TyInfo = GetTypeSourceInfo()) {
	VarTemplateSpecializationDecl::ExplicitSpecializationInfo *ExplicitInfo =
	new (C) VarTemplateSpecializationDecl::ExplicitSpecializationInfo;
	ExplicitInfo->TypeAsWritten = TyInfo;
	ExplicitInfo->ExternLoc = ReadSourceLocation();
	ExplicitInfo->TemplateKeywordLoc = ReadSourceLocation();
	D->ExplicitInfo = ExplicitInfo;
	}

	SmallVector<TemplateArgument, 8> TemplArgs;
	Record.readTemplateArgumentList(TemplArgs, /Canonicalize/ true);
	D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs);
	D->PointOfInstantiation = ReadSourceLocation();
	D->SpecializationKind = (TemplateSpecializationKind)Record.readInt();

	bool writtenAsCanonicalDecl = Record.readInt();
	if (writtenAsCanonicalDecl) {
	VarTemplateDecl *CanonPattern = ReadDeclAs<VarTemplateDecl>();
	if (D->isCanonicalDecl()) { // It's kept in the folding set.
	// FIXME: If it's already present, merge it.
	if (VarTemplatePartialSpecializationDecl *Partial =
	dyn_cast<VarTemplatePartialSpecializationDecl>(D)) {
	CanonPattern->getCommonPtr()->PartialSpecializations
	.GetOrInsertNode(Partial);
	} else {
	CanonPattern->getCommonPtr()->Specializations.GetOrInsertNode(D);
	}
	}
	}

	return Redecl;
	}

	/// TODO: Unify with ClassTemplatePartialSpecializationDecl version?
	/// May require unifying ClassTemplate(Partial)SpecializationDecl and
	/// VarTemplate(Partial)SpecializationDecl with a new data
	/// structure Template(Partial)SpecializationDecl, and
	/// using Template(Partial)SpecializationDecl as input type.
	void ASTDeclReader::VisitVarTemplatePartialSpecializationDecl(
	VarTemplatePartialSpecializationDecl *D) {
	RedeclarableResult Redecl = VisitVarTemplateSpecializationDeclImpl(D);

	D->TemplateParams = Record.readTemplateParameterList();
	D->ArgsAsWritten = Record.readASTTemplateArgumentListInfo();

	// These are read/set from/to the first declaration.
	if (ThisDeclID == Redecl.getFirstID()) {
	D->InstantiatedFromMember.setPointer(
	ReadDeclAs<VarTemplatePartialSpecializationDecl>());
	D->InstantiatedFromMember.setInt(Record.readInt());
	}
	}

	void ASTDeclReader::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) {
	VisitTypeDecl(D);

	D->setDeclaredWithTypename(Record.readInt());

	if (Record.readInt())
	D->setDefaultArgument(GetTypeSourceInfo());
	}

	void ASTDeclReader::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
	VisitDeclaratorDecl(D);
	// TemplateParmPosition.
	D->setDepth(Record.readInt());
	D->setPosition(Record.readInt());
	if (D->isExpandedParameterPack()) {
	auto TypesAndInfos =
	D->getTrailingObjects<std::pair<QualType, TypeSourceInfo *>>();
	for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) {
	new (&TypesAndInfos[I].first) QualType(Record.readType());
	TypesAndInfos[I].second = GetTypeSourceInfo();
	}
	} else {
	// Rest of NonTypeTemplateParmDecl.
	D->ParameterPack = Record.readInt();
	if (Record.readInt())
	D->setDefaultArgument(Record.readExpr());
	}
	}

	void ASTDeclReader::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) {
	VisitTemplateDecl(D);
	// TemplateParmPosition.
	D->setDepth(Record.readInt());
	D->setPosition(Record.readInt());
	if (D->isExpandedParameterPack()) {
	TemplateParameterList **Data =
	D->getTrailingObjects<TemplateParameterList *>();
	for (unsigned I = 0, N = D->getNumExpansionTemplateParameters();
	I != N; ++I)
	Data[I] = Record.readTemplateParameterList();
	} else {
	// Rest of TemplateTemplateParmDecl.
	D->ParameterPack = Record.readInt();
	if (Record.readInt())
	D->setDefaultArgument(Reader.getContext(),
	Record.readTemplateArgumentLoc());
	}
	}

	void ASTDeclReader::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) {
	VisitRedeclarableTemplateDecl(D);
	}

	void ASTDeclReader::VisitStaticAssertDecl(StaticAssertDecl *D) {
	VisitDecl(D);
	D->AssertExprAndFailed.setPointer(Record.readExpr());
	D->AssertExprAndFailed.setInt(Record.readInt());
	D->Message = cast_or_null<StringLiteral>(Record.readExpr());
	D->RParenLoc = ReadSourceLocation();
	}

	void ASTDeclReader::VisitEmptyDecl(EmptyDecl *D) {
	VisitDecl(D);
	}

	std::pair<uint64_t, uint64_t>
	ASTDeclReader::VisitDeclContext(DeclContext *DC) {
	uint64_t LexicalOffset = ReadLocalOffset();
	uint64_t VisibleOffset = ReadLocalOffset();
	return std::make_pair(LexicalOffset, VisibleOffset);
	}

	template <typename T>
	ASTDeclReader::RedeclarableResult
	ASTDeclReader::VisitRedeclarable(Redeclarable<T> *D) {
	DeclID FirstDeclID = ReadDeclID();
	Decl *MergeWith = nullptr;

	bool IsKeyDecl = ThisDeclID == FirstDeclID;
	bool IsFirstLocalDecl = false;

	uint64_t RedeclOffset = 0;

	// 0 indicates that this declaration was the only declaration of its entity,
	// and is used for space optimization.
	if (FirstDeclID == 0) {
	FirstDeclID = ThisDeclID;
	IsKeyDecl = true;
	IsFirstLocalDecl = true;
	} else if (unsigned N = Record.readInt()) {
	// This declaration was the first local declaration, but may have imported
	// other declarations.
	IsKeyDecl = N == 1;
	IsFirstLocalDecl = true;

	// We have some declarations that must be before us in our redeclaration
	// chain. Read them now, and remember that we ought to merge with one of
	// them.
	// FIXME: Provide a known merge target to the second and subsequent such
	// declaration.
	for (unsigned I = 0; I != N - 1; ++I)
	MergeWith = ReadDecl();

	RedeclOffset = ReadLocalOffset();
	} else {
	// This declaration was not the first local declaration. Read the first
	// local declaration now, to trigger the import of other redeclarations.
	(void)ReadDecl();
	}

	T *FirstDecl = cast_or_null<T>(Reader.GetDecl(FirstDeclID));
	if (FirstDecl != D) {
	// We delay loading of the redeclaration chain to avoid deeply nested calls.
	// We temporarily set the first (canonical) declaration as the previous one
	// which is the one that matters and mark the real previous DeclID to be
	// loaded & attached later on.
	D->RedeclLink = Redeclarable<T>::PreviousDeclLink(FirstDecl);
	D->First = FirstDecl->getCanonicalDecl();
	}

	T DAsT = static_cast<T>(D);

	// Note that we need to load local redeclarations of this decl and build a
	// decl chain for them. This must happen after we perform the preloading
	// above; this ensures that the redeclaration chain is built in the correct
	// order.
	if (IsFirstLocalDecl)
	Reader.PendingDeclChains.push_back(std::make_pair(DAsT, RedeclOffset));

	return RedeclarableResult(MergeWith, FirstDeclID, IsKeyDecl);
	}

	/// \brief Attempts to merge the given declaration (D) with another declaration
	/// of the same entity.
	template<typename T>
	void ASTDeclReader::mergeRedeclarable(Redeclarable<T> *DBase,
	RedeclarableResult &Redecl,
	DeclID TemplatePatternID) {
	// If modules are not available, there is no reason to perform this merge.
	if (!Reader.getContext().getLangOpts().Modules)
	return;

	// If we're not the canonical declaration, we don't need to merge.
	if (!DBase->isFirstDecl())
	return;

	T D = static_cast<T>(DBase);

	if (auto *Existing = Redecl.getKnownMergeTarget())
	// We already know of an existing declaration we should merge with.
	mergeRedeclarable(D, cast<T>(Existing), Redecl, TemplatePatternID);
	else if (FindExistingResult ExistingRes = findExisting(D))
	if (T *Existing = ExistingRes)
	mergeRedeclarable(D, Existing, Redecl, TemplatePatternID);
	}

	/// \brief "Cast" to type T, asserting if we don't have an implicit conversion.
	/// We use this to put code in a template that will only be valid for certain
	/// instantiations.
	template<typename T> static T assert_cast(T t) { return t; }
	template<typename T> static T assert_cast(...) {
	llvm_unreachable("bad assert_cast");
	}

	/// \brief Merge together the pattern declarations from two template
	/// declarations.
	void ASTDeclReader::mergeTemplatePattern(RedeclarableTemplateDecl *D,
	RedeclarableTemplateDecl *Existing,
	DeclID DsID, bool IsKeyDecl) {
	auto *DPattern = D->getTemplatedDecl();
	auto *ExistingPattern = Existing->getTemplatedDecl();
	RedeclarableResult Result(/MergeWith/ ExistingPattern,
	DPattern->getCanonicalDecl()->getGlobalID(),
	IsKeyDecl);

	if (auto *DClass = dyn_cast<CXXRecordDecl>(DPattern)) {
	// Merge with any existing definition.
	// FIXME: This is duplicated in several places. Refactor.
	auto *ExistingClass =
	cast<CXXRecordDecl>(ExistingPattern)->getCanonicalDecl();
	if (auto *DDD = DClass->DefinitionData) {
	if (ExistingClass->DefinitionData) {
	MergeDefinitionData(ExistingClass, std::move(*DDD));
	} else {
	ExistingClass->DefinitionData = DClass->DefinitionData;
	// We may have skipped this before because we thought that DClass
	// was the canonical declaration.
	Reader.PendingDefinitions.insert(DClass);
	}
	}
	DClass->DefinitionData = ExistingClass->DefinitionData;

	return mergeRedeclarable(DClass, cast<TagDecl>(ExistingPattern),
	Result);
	}
	if (auto *DFunction = dyn_cast<FunctionDecl>(DPattern))
	return mergeRedeclarable(DFunction, cast<FunctionDecl>(ExistingPattern),
	Result);
	if (auto *DVar = dyn_cast<VarDecl>(DPattern))
	return mergeRedeclarable(DVar, cast<VarDecl>(ExistingPattern), Result);
	if (auto *DAlias = dyn_cast<TypeAliasDecl>(DPattern))
	return mergeRedeclarable(DAlias, cast<TypedefNameDecl>(ExistingPattern),
	Result);
	llvm_unreachable("merged an unknown kind of redeclarable template");
	}

	/// \brief Attempts to merge the given declaration (D) with another declaration
	/// of the same entity.
	template<typename T>
	void ASTDeclReader::mergeRedeclarable(Redeclarable<T> DBase, T Existing,
	RedeclarableResult &Redecl,
	DeclID TemplatePatternID) {
	T D = static_cast<T>(DBase);
	T *ExistingCanon = Existing->getCanonicalDecl();
	T *DCanon = D->getCanonicalDecl();
	if (ExistingCanon != DCanon) {
	assert(DCanon->getGlobalID() == Redecl.getFirstID() &&
	"already merged this declaration");

	// Have our redeclaration link point back at the canonical declaration
	// of the existing declaration, so that this declaration has the
	// appropriate canonical declaration.
	D->RedeclLink = Redeclarable<T>::PreviousDeclLink(ExistingCanon);
	D->First = ExistingCanon;
	ExistingCanon->Used \|= D->Used;
	D->Used = false;

	// When we merge a namespace, update its pointer to the first namespace.
	// We cannot have loaded any redeclarations of this declaration yet, so
	// there's nothing else that needs to be updated.
	if (auto *Namespace = dyn_cast<NamespaceDecl>(D))
	Namespace->AnonOrFirstNamespaceAndInline.setPointer(
	assert_cast<NamespaceDecl*>(ExistingCanon));

	// When we merge a template, merge its pattern.
	if (auto *DTemplate = dyn_cast<RedeclarableTemplateDecl>(D))
	mergeTemplatePattern(
	DTemplate, assert_cast<RedeclarableTemplateDecl*>(ExistingCanon),
	TemplatePatternID, Redecl.isKeyDecl());

	// If this declaration is a key declaration, make a note of that.
	if (Redecl.isKeyDecl())
	Reader.KeyDecls[ExistingCanon].push_back(Redecl.getFirstID());
	}
	}

	/// \brief Attempts to merge the given declaration (D) with another declaration
	/// of the same entity, for the case where the entity is not actually
	/// redeclarable. This happens, for instance, when merging the fields of
	/// identical class definitions from two different modules.
	template<typename T>
	void ASTDeclReader::mergeMergeable(Mergeable<T> *D) {
	// If modules are not available, there is no reason to perform this merge.
	if (!Reader.getContext().getLangOpts().Modules)
	return;

	// ODR-based merging is only performed in C++. In C, identically-named things
	// in different translation units are not redeclarations (but may still have
	// compatible types).
	if (!Reader.getContext().getLangOpts().CPlusPlus)
	return;

	if (FindExistingResult ExistingRes = findExisting(static_cast<T*>(D)))
	if (T *Existing = ExistingRes)
	Reader.getContext().setPrimaryMergedDecl(static_cast<T *>(D),
	Existing->getCanonicalDecl());
	}

	void ASTDeclReader::VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D) {
	VisitDecl(D);
	unsigned NumVars = D->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i) {
	Vars.push_back(Record.readExpr());
	}
	D->setVars(Vars);
	}

	void ASTDeclReader::VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D) {
	VisitValueDecl(D);
	D->setLocation(ReadSourceLocation());
	D->setCombiner(Record.readExpr());
	D->setInitializer(Record.readExpr());
	D->PrevDeclInScope = ReadDeclID();
	}

	void ASTDeclReader::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) {
	VisitVarDecl(D);
	}

	//===----------------------------------------------------------------------===//
	// Attribute Reading
	//===----------------------------------------------------------------------===//

	/// \brief Reads attributes from the current stream position.
	void ASTReader::ReadAttributes(ASTRecordReader &Record, AttrVec &Attrs) {
	for (unsigned i = 0, e = Record.readInt(); i != e; ++i) {
	Attr *New = nullptr;
	attr::Kind Kind = (attr::Kind)Record.readInt();
	SourceRange Range = Record.readSourceRange();
	ASTContext &Context = getContext();

	#include "clang/Serialization/AttrPCHRead.inc"

	assert(New && "Unable to decode attribute?");
	Attrs.push_back(New);
	}
	}

	//===----------------------------------------------------------------------===//
	// ASTReader Implementation
	//===----------------------------------------------------------------------===//

	/// \brief Note that we have loaded the declaration with the given
	/// Index.
	///
	/// This routine notes that this declaration has already been loaded,
	/// so that future GetDecl calls will return this declaration rather
	/// than trying to load a new declaration.
	inline void ASTReader::LoadedDecl(unsigned Index, Decl *D) {
	assert(!DeclsLoaded[Index] && "Decl loaded twice?");
	DeclsLoaded[Index] = D;
	}


	/// \brief Determine whether the consumer will be interested in seeing
	/// this declaration (via HandleTopLevelDecl).
	///
	/// This routine should return true for anything that might affect
	/// code generation, e.g., inline function definitions, Objective-C
	/// declarations with metadata, etc.
	static bool isConsumerInterestedIn(ASTContext &Ctx, Decl *D, bool HasBody) {
	// An ObjCMethodDecl is never considered as "interesting" because its
	// implementation container always is.

	// An ImportDecl or VarDecl imported from a module will get emitted when
	// we import the relevant module.
	if ((isa<ImportDecl>(D) \|\| isa<VarDecl>(D)) && D->getImportedOwningModule() &&
	Ctx.DeclMustBeEmitted(D))
	return false;

	if (isa<FileScopeAsmDecl>(D) \|\|
	isa<ObjCProtocolDecl>(D) \|\|
	isa<ObjCImplDecl>(D) \|\|
	isa<ImportDecl>(D) \|\|
	isa<PragmaCommentDecl>(D) \|\|
	isa<PragmaDetectMismatchDecl>(D))
	return true;
	if (isa<OMPThreadPrivateDecl>(D) \|\| isa<OMPDeclareReductionDecl>(D))
	return !D->getDeclContext()->isFunctionOrMethod();
	if (VarDecl *Var = dyn_cast<VarDecl>(D))
	return Var->isFileVarDecl() &&
	Var->isThisDeclarationADefinition() == VarDecl::Definition;
	if (FunctionDecl *Func = dyn_cast<FunctionDecl>(D))
	return Func->doesThisDeclarationHaveABody() \|\| HasBody;

	if (auto *ES = D->getASTContext().getExternalSource())
	if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never)
	return true;

	return false;
	}

	/// \brief Get the correct cursor and offset for loading a declaration.
	ASTReader::RecordLocation
	ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) {
	GlobalDeclMapType::iterator I = GlobalDeclMap.find(ID);
	assert(I != GlobalDeclMap.end() && "Corrupted global declaration map");
	ModuleFile *M = I->second;
	const DeclOffset &DOffs =
	M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
	Loc = TranslateSourceLocation(*M, DOffs.getLocation());
	return RecordLocation(M, DOffs.BitOffset);
	}

	ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) {
	ContinuousRangeMap<uint64_t, ModuleFile*, 4>::iterator I
	= GlobalBitOffsetsMap.find(GlobalOffset);

	assert(I != GlobalBitOffsetsMap.end() && "Corrupted global bit offsets map");
	return RecordLocation(I->second, GlobalOffset - I->second->GlobalBitOffset);
	}

	uint64_t ASTReader::getGlobalBitOffset(ModuleFile &M, uint32_t LocalOffset) {
	return LocalOffset + M.GlobalBitOffset;
	}

	static bool isSameTemplateParameterList(const TemplateParameterList *X,
	const TemplateParameterList *Y);

	/// \brief Determine whether two template parameters are similar enough
	/// that they may be used in declarations of the same template.
	static bool isSameTemplateParameter(const NamedDecl *X,
	const NamedDecl *Y) {
	if (X->getKind() != Y->getKind())
	return false;

	if (const TemplateTypeParmDecl *TX = dyn_cast<TemplateTypeParmDecl>(X)) {
	const TemplateTypeParmDecl *TY = cast<TemplateTypeParmDecl>(Y);
	return TX->isParameterPack() == TY->isParameterPack();
	}

	if (const NonTypeTemplateParmDecl *TX = dyn_cast<NonTypeTemplateParmDecl>(X)) {
	const NonTypeTemplateParmDecl *TY = cast<NonTypeTemplateParmDecl>(Y);
	return TX->isParameterPack() == TY->isParameterPack() &&
	TX->getASTContext().hasSameType(TX->getType(), TY->getType());
	}

	const TemplateTemplateParmDecl *TX = cast<TemplateTemplateParmDecl>(X);
	const TemplateTemplateParmDecl *TY = cast<TemplateTemplateParmDecl>(Y);
	return TX->isParameterPack() == TY->isParameterPack() &&
	isSameTemplateParameterList(TX->getTemplateParameters(),
	TY->getTemplateParameters());
	}

	static NamespaceDecl getNamespace(const NestedNameSpecifier X) {
	if (auto *NS = X->getAsNamespace())
	return NS;
	if (auto *NAS = X->getAsNamespaceAlias())
	return NAS->getNamespace();
	return nullptr;
	}

	static bool isSameQualifier(const NestedNameSpecifier *X,
	const NestedNameSpecifier *Y) {
	if (auto *NSX = getNamespace(X)) {
	auto *NSY = getNamespace(Y);
	if (!NSY \|\| NSX->getCanonicalDecl() != NSY->getCanonicalDecl())
	return false;
	} else if (X->getKind() != Y->getKind())
	return false;

	// FIXME: For namespaces and types, we're permitted to check that the entity
	// is named via the same tokens. We should probably do so.
	switch (X->getKind()) {
	case NestedNameSpecifier::Identifier:
	if (X->getAsIdentifier() != Y->getAsIdentifier())
	return false;
	break;
	case NestedNameSpecifier::Namespace:
	case NestedNameSpecifier::NamespaceAlias:
	// We've already checked that we named the same namespace.
	break;
	case NestedNameSpecifier::TypeSpec:
	case NestedNameSpecifier::TypeSpecWithTemplate:
	if (X->getAsType()->getCanonicalTypeInternal() !=
	Y->getAsType()->getCanonicalTypeInternal())
	return false;
	break;
	case NestedNameSpecifier::Global:
	case NestedNameSpecifier::Super:
	return true;
	}

	// Recurse into earlier portion of NNS, if any.
	auto *PX = X->getPrefix();
	auto *PY = Y->getPrefix();
	if (PX && PY)
	return isSameQualifier(PX, PY);
	return !PX && !PY;
	}

	/// \brief Determine whether two template parameter lists are similar enough
	/// that they may be used in declarations of the same template.
	static bool isSameTemplateParameterList(const TemplateParameterList *X,
	const TemplateParameterList *Y) {
	if (X->size() != Y->size())
	return false;

	for (unsigned I = 0, N = X->size(); I != N; ++I)
	if (!isSameTemplateParameter(X->getParam(I), Y->getParam(I)))
	return false;

	return true;
	}

	/// Determine whether the attributes we can overload on are identical for A and
	/// B. Will ignore any overloadable attrs represented in the type of A and B.
	static bool hasSameOverloadableAttrs(const FunctionDecl *A,
	const FunctionDecl *B) {
	// Note that pass_object_size attributes are represented in the function's
	// ExtParameterInfo, so we don't need to check them here.

	SmallVector<const EnableIfAttr *, 4> AEnableIfs;
	// Since this is an equality check, we can ignore that enable_if attrs show up
	// in reverse order.
	for (const auto *EIA : A->specific_attrs<EnableIfAttr>())
	AEnableIfs.push_back(EIA);

	SmallVector<const EnableIfAttr *, 4> BEnableIfs;
	for (const auto *EIA : B->specific_attrs<EnableIfAttr>())
	BEnableIfs.push_back(EIA);

	// Two very common cases: either we have 0 enable_if attrs, or we have an
	// unequal number of enable_if attrs.
	if (AEnableIfs.empty() && BEnableIfs.empty())
	return true;

	if (AEnableIfs.size() != BEnableIfs.size())
	return false;

	llvm::FoldingSetNodeID Cand1ID, Cand2ID;
	for (unsigned I = 0, E = AEnableIfs.size(); I != E; ++I) {
	Cand1ID.clear();
	Cand2ID.clear();

	AEnableIfs[I]->getCond()->Profile(Cand1ID, A->getASTContext(), true);
	BEnableIfs[I]->getCond()->Profile(Cand2ID, B->getASTContext(), true);
	if (Cand1ID != Cand2ID)
	return false;
	}

	return true;
	}

	/// \brief Determine whether the two declarations refer to the same entity.
	static bool isSameEntity(NamedDecl X, NamedDecl Y) {
	assert(X->getDeclName() == Y->getDeclName() && "Declaration name mismatch!");

	if (X == Y)
	return true;

	// Must be in the same context.
	if (!X->getDeclContext()->getRedeclContext()->Equals(
	Y->getDeclContext()->getRedeclContext()))
	return false;

	// Two typedefs refer to the same entity if they have the same underlying
	// type.
	if (TypedefNameDecl *TypedefX = dyn_cast<TypedefNameDecl>(X))
	if (TypedefNameDecl *TypedefY = dyn_cast<TypedefNameDecl>(Y))
	return X->getASTContext().hasSameType(TypedefX->getUnderlyingType(),
	TypedefY->getUnderlyingType());

	// Must have the same kind.
	if (X->getKind() != Y->getKind())
	return false;

	// Objective-C classes and protocols with the same name always match.
	if (isa<ObjCInterfaceDecl>(X) \|\| isa<ObjCProtocolDecl>(X))
	return true;

	if (isa<ClassTemplateSpecializationDecl>(X)) {
	// No need to handle these here: we merge them when adding them to the
	// template.
	return false;
	}

	// Compatible tags match.
	if (TagDecl *TagX = dyn_cast<TagDecl>(X)) {
	TagDecl *TagY = cast<TagDecl>(Y);
	return (TagX->getTagKind() == TagY->getTagKind()) \|\|
	((TagX->getTagKind() == TTK_Struct \|\| TagX->getTagKind() == TTK_Class \|\|
	TagX->getTagKind() == TTK_Interface) &&
	(TagY->getTagKind() == TTK_Struct \|\| TagY->getTagKind() == TTK_Class \|\|
	TagY->getTagKind() == TTK_Interface));
	}

	// Functions with the same type and linkage match.
	// FIXME: This needs to cope with merging of prototyped/non-prototyped
	// functions, etc.
	if (FunctionDecl *FuncX = dyn_cast<FunctionDecl>(X)) {
	FunctionDecl *FuncY = cast<FunctionDecl>(Y);
	if (CXXConstructorDecl *CtorX = dyn_cast<CXXConstructorDecl>(X)) {
	CXXConstructorDecl *CtorY = cast<CXXConstructorDecl>(Y);
	if (CtorX->getInheritedConstructor() &&
	!isSameEntity(CtorX->getInheritedConstructor().getConstructor(),
	CtorY->getInheritedConstructor().getConstructor()))
	return false;
	}
	ASTContext &C = FuncX->getASTContext();
	if (!C.hasSameType(FuncX->getType(), FuncY->getType())) {
	// We can get functions with different types on the redecl chain in C++17
	// if they have differing exception specifications and at least one of
	// the excpetion specs is unresolved.
	// FIXME: Do we need to check for C++14 deduced return types here too?
	auto *XFPT = FuncX->getType()->getAs<FunctionProtoType>();
	auto *YFPT = FuncY->getType()->getAs<FunctionProtoType>();
	if (C.getLangOpts().CPlusPlus1z && XFPT && YFPT &&
	(isUnresolvedExceptionSpec(XFPT->getExceptionSpecType()) \|\|
	isUnresolvedExceptionSpec(YFPT->getExceptionSpecType())) &&
	C.hasSameFunctionTypeIgnoringExceptionSpec(FuncX->getType(),
	FuncY->getType()))
	return true;
	return false;
	}
	return FuncX->getLinkageInternal() == FuncY->getLinkageInternal() &&
	hasSameOverloadableAttrs(FuncX, FuncY);
	}

	// Variables with the same type and linkage match.
	if (VarDecl *VarX = dyn_cast<VarDecl>(X)) {
	VarDecl *VarY = cast<VarDecl>(Y);
	if (VarX->getLinkageInternal() == VarY->getLinkageInternal()) {
	ASTContext &C = VarX->getASTContext();
	if (C.hasSameType(VarX->getType(), VarY->getType()))
	return true;

	// We can get decls with different types on the redecl chain. Eg.
	// template <typename T> struct S { static T Var[]; }; // #1
	// template <typename T> T S<T>::Var[sizeof(T)]; // #2
	// Only? happens when completing an incomplete array type. In this case
	// when comparing #1 and #2 we should go through their element type.
	const ArrayType *VarXTy = C.getAsArrayType(VarX->getType());
	const ArrayType *VarYTy = C.getAsArrayType(VarY->getType());
	if (!VarXTy \|\| !VarYTy)
	return false;
	if (VarXTy->isIncompleteArrayType() \|\| VarYTy->isIncompleteArrayType())
	return C.hasSameType(VarXTy->getElementType(), VarYTy->getElementType());
	}
	return false;
	}

	// Namespaces with the same name and inlinedness match.
	if (NamespaceDecl *NamespaceX = dyn_cast<NamespaceDecl>(X)) {
	NamespaceDecl *NamespaceY = cast<NamespaceDecl>(Y);
	return NamespaceX->isInline() == NamespaceY->isInline();
	}

	// Identical template names and kinds match if their template parameter lists
	// and patterns match.
	if (TemplateDecl *TemplateX = dyn_cast<TemplateDecl>(X)) {
	TemplateDecl *TemplateY = cast<TemplateDecl>(Y);
	return isSameEntity(TemplateX->getTemplatedDecl(),
	TemplateY->getTemplatedDecl()) &&
	isSameTemplateParameterList(TemplateX->getTemplateParameters(),
	TemplateY->getTemplateParameters());
	}

	// Fields with the same name and the same type match.
	if (FieldDecl *FDX = dyn_cast<FieldDecl>(X)) {
	FieldDecl *FDY = cast<FieldDecl>(Y);
	// FIXME: Also check the bitwidth is odr-equivalent, if any.
	return X->getASTContext().hasSameType(FDX->getType(), FDY->getType());
	}

	// Indirect fields with the same target field match.
	if (auto *IFDX = dyn_cast<IndirectFieldDecl>(X)) {
	auto *IFDY = cast<IndirectFieldDecl>(Y);
	return IFDX->getAnonField()->getCanonicalDecl() ==
	IFDY->getAnonField()->getCanonicalDecl();
	}

	// Enumerators with the same name match.
	if (isa<EnumConstantDecl>(X))
	// FIXME: Also check the value is odr-equivalent.
	return true;

	// Using shadow declarations with the same target match.
	if (UsingShadowDecl *USX = dyn_cast<UsingShadowDecl>(X)) {
	UsingShadowDecl *USY = cast<UsingShadowDecl>(Y);
	return USX->getTargetDecl() == USY->getTargetDecl();
	}

	// Using declarations with the same qualifier match. (We already know that
	// the name matches.)
	if (auto *UX = dyn_cast<UsingDecl>(X)) {
	auto *UY = cast<UsingDecl>(Y);
	return isSameQualifier(UX->getQualifier(), UY->getQualifier()) &&
	UX->hasTypename() == UY->hasTypename() &&
	UX->isAccessDeclaration() == UY->isAccessDeclaration();
	}
	if (auto *UX = dyn_cast<UnresolvedUsingValueDecl>(X)) {
	auto *UY = cast<UnresolvedUsingValueDecl>(Y);
	return isSameQualifier(UX->getQualifier(), UY->getQualifier()) &&
	UX->isAccessDeclaration() == UY->isAccessDeclaration();
	}
	if (auto *UX = dyn_cast<UnresolvedUsingTypenameDecl>(X))
	return isSameQualifier(
	UX->getQualifier(),
	cast<UnresolvedUsingTypenameDecl>(Y)->getQualifier());

	// Namespace alias definitions with the same target match.
	if (auto *NAX = dyn_cast<NamespaceAliasDecl>(X)) {
	auto *NAY = cast<NamespaceAliasDecl>(Y);
	return NAX->getNamespace()->Equals(NAY->getNamespace());
	}

	return false;
	}

	/// Find the context in which we should search for previous declarations when
	/// looking for declarations to merge.
	DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader,
	DeclContext *DC) {
	if (NamespaceDecl *ND = dyn_cast<NamespaceDecl>(DC))
	return ND->getOriginalNamespace();

	if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(DC)) {
	// Try to dig out the definition.
	auto *DD = RD->DefinitionData;
	if (!DD)
	DD = RD->getCanonicalDecl()->DefinitionData;

	// If there's no definition yet, then DC's definition is added by an update
	// record, but we've not yet loaded that update record. In this case, we
	// commit to DC being the canonical definition now, and will fix this when
	// we load the update record.
	if (!DD) {
	DD = new (Reader.getContext()) struct CXXRecordDecl::DefinitionData(RD);
	RD->IsCompleteDefinition = true;
	RD->DefinitionData = DD;
	RD->getCanonicalDecl()->DefinitionData = DD;

	// Track that we did this horrible thing so that we can fix it later.
	Reader.PendingFakeDefinitionData.insert(
	std::make_pair(DD, ASTReader::PendingFakeDefinitionKind::Fake));
	}

	return DD->Definition;
	}

	if (EnumDecl *ED = dyn_cast<EnumDecl>(DC))
	return ED->getASTContext().getLangOpts().CPlusPlus? ED->getDefinition()
	: nullptr;

	// We can see the TU here only if we have no Sema object. In that case,
	// there's no TU scope to look in, so using the DC alone is sufficient.
	if (auto *TU = dyn_cast<TranslationUnitDecl>(DC))
	return TU;

	return nullptr;
	}

	ASTDeclReader::FindExistingResult::~FindExistingResult() {
	// Record that we had a typedef name for linkage whether or not we merge
	// with that declaration.
	if (TypedefNameForLinkage) {
	DeclContext *DC = New->getDeclContext()->getRedeclContext();
	Reader.ImportedTypedefNamesForLinkage.insert(
	std::make_pair(std::make_pair(DC, TypedefNameForLinkage), New));
	return;
	}

	if (!AddResult \|\| Existing)
	return;

	DeclarationName Name = New->getDeclName();
	DeclContext *DC = New->getDeclContext()->getRedeclContext();
	if (needsAnonymousDeclarationNumber(New)) {
	setAnonymousDeclForMerging(Reader, New->getLexicalDeclContext(),
	AnonymousDeclNumber, New);
	} else if (DC->isTranslationUnit() &&
	!Reader.getContext().getLangOpts().CPlusPlus) {
	if (Reader.getIdResolver().tryAddTopLevelDecl(New, Name))
	Reader.PendingFakeLookupResults[Name.getAsIdentifierInfo()]
	.push_back(New);
	} else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) {
	// Add the declaration to its redeclaration context so later merging
	// lookups will find it.
	MergeDC->makeDeclVisibleInContextImpl(New, /Internal/true);
	}
	}

	/// Find the declaration that should be merged into, given the declaration found
	/// by name lookup. If we're merging an anonymous declaration within a typedef,
	/// we need a matching typedef, and we merge with the type inside it.
	static NamedDecl getDeclForMerging(NamedDecl Found,
	bool IsTypedefNameForLinkage) {
	if (!IsTypedefNameForLinkage)
	return Found;

	// If we found a typedef declaration that gives a name to some other
	// declaration, then we want that inner declaration. Declarations from
	// AST files are handled via ImportedTypedefNamesForLinkage.
	if (Found->isFromASTFile())
	return nullptr;

	if (auto *TND = dyn_cast<TypedefNameDecl>(Found))
	return TND->getAnonDeclWithTypedefName(/AnyRedecl/true);

	return nullptr;
	}

	NamedDecl *ASTDeclReader::getAnonymousDeclForMerging(ASTReader &Reader,
	DeclContext *DC,
	unsigned Index) {
	// If the lexical context has been merged, look into the now-canonical
	// definition.
	if (auto *Merged = Reader.MergedDeclContexts.lookup(DC))
	DC = Merged;

	// If we've seen this before, return the canonical declaration.
	auto &Previous = Reader.AnonymousDeclarationsForMerging[DC];
	if (Index < Previous.size() && Previous[Index])
	return Previous[Index];

	// If this is the first time, but we have parsed a declaration of the context,
	// build the anonymous declaration list from the parsed declaration.
	if (!cast<Decl>(DC)->isFromASTFile()) {
	numberAnonymousDeclsWithin(DC, [&](NamedDecl *ND, unsigned Number) {
	if (Previous.size() == Number)
	Previous.push_back(cast<NamedDecl>(ND->getCanonicalDecl()));
	else
	Previous[Number] = cast<NamedDecl>(ND->getCanonicalDecl());
	});
	}

	return Index < Previous.size() ? Previous[Index] : nullptr;
	}

	void ASTDeclReader::setAnonymousDeclForMerging(ASTReader &Reader,
	DeclContext *DC, unsigned Index,
	NamedDecl *D) {
	if (auto *Merged = Reader.MergedDeclContexts.lookup(DC))
	DC = Merged;

	auto &Previous = Reader.AnonymousDeclarationsForMerging[DC];
	if (Index >= Previous.size())
	Previous.resize(Index + 1);
	if (!Previous[Index])
	Previous[Index] = D;
	}

	ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) {
	DeclarationName Name = TypedefNameForLinkage ? TypedefNameForLinkage
	: D->getDeclName();

	if (!Name && !needsAnonymousDeclarationNumber(D)) {
	// Don't bother trying to find unnamed declarations that are in
	// unmergeable contexts.
	FindExistingResult Result(Reader, D, /Existing=/nullptr,
	AnonymousDeclNumber, TypedefNameForLinkage);
	Result.suppress();
	return Result;
	}

	DeclContext *DC = D->getDeclContext()->getRedeclContext();
	if (TypedefNameForLinkage) {
	auto It = Reader.ImportedTypedefNamesForLinkage.find(
	std::make_pair(DC, TypedefNameForLinkage));
	if (It != Reader.ImportedTypedefNamesForLinkage.end())
	if (isSameEntity(It->second, D))
	return FindExistingResult(Reader, D, It->second, AnonymousDeclNumber,
	TypedefNameForLinkage);
	// Go on to check in other places in case an existing typedef name
	// was not imported.
	}

	if (needsAnonymousDeclarationNumber(D)) {
	// This is an anonymous declaration that we may need to merge. Look it up
	// in its context by number.
	if (auto *Existing = getAnonymousDeclForMerging(
	Reader, D->getLexicalDeclContext(), AnonymousDeclNumber))
	if (isSameEntity(Existing, D))
	return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
	TypedefNameForLinkage);
	} else if (DC->isTranslationUnit() &&
	!Reader.getContext().getLangOpts().CPlusPlus) {
	IdentifierResolver &IdResolver = Reader.getIdResolver();

	// Temporarily consider the identifier to be up-to-date. We don't want to
	// cause additional lookups here.
	class UpToDateIdentifierRAII {
	IdentifierInfo *II;
	bool WasOutToDate;

	public:
	explicit UpToDateIdentifierRAII(IdentifierInfo *II)
	: II(II), WasOutToDate(false)
	{
	if (II) {
	WasOutToDate = II->isOutOfDate();
	if (WasOutToDate)
	II->setOutOfDate(false);
	}
	}

	~UpToDateIdentifierRAII() {
	if (WasOutToDate)
	II->setOutOfDate(true);
	}
	} UpToDate(Name.getAsIdentifierInfo());

	for (IdentifierResolver::iterator I = IdResolver.begin(Name),
	IEnd = IdResolver.end();
	I != IEnd; ++I) {
	if (NamedDecl Existing = getDeclForMerging(I, TypedefNameForLinkage))
	if (isSameEntity(Existing, D))
	return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
	TypedefNameForLinkage);
	}
	} else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) {
	DeclContext::lookup_result R = MergeDC->noload_lookup(Name);
	for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E; ++I) {
	if (NamedDecl Existing = getDeclForMerging(I, TypedefNameForLinkage))
	if (isSameEntity(Existing, D))
	return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
	TypedefNameForLinkage);
	}
	} else {
	// Not in a mergeable context.
	return FindExistingResult(Reader);
	}

	// If this declaration is from a merged context, make a note that we need to
	// check that the canonical definition of that context contains the decl.
	//
	// FIXME: We should do something similar if we merge two definitions of the
	// same template specialization into the same CXXRecordDecl.
	auto MergedDCIt = Reader.MergedDeclContexts.find(D->getLexicalDeclContext());
	if (MergedDCIt != Reader.MergedDeclContexts.end() &&
	MergedDCIt->second == D->getDeclContext())
	Reader.PendingOdrMergeChecks.push_back(D);

	return FindExistingResult(Reader, D, /Existing=/nullptr,
	AnonymousDeclNumber, TypedefNameForLinkage);
	}

	template<typename DeclT>
	Decl ASTDeclReader::getMostRecentDeclImpl(Redeclarable<DeclT> D) {
	return D->RedeclLink.getLatestNotUpdated();
	}
	Decl *ASTDeclReader::getMostRecentDeclImpl(...) {
	llvm_unreachable("getMostRecentDecl on non-redeclarable declaration");
	}

	Decl ASTDeclReader::getMostRecentDecl(Decl D) {
	assert(D);

	switch (D->getKind()) {
	#define ABSTRACT_DECL(TYPE)
	#define DECL(TYPE, BASE) \
	case Decl::TYPE: \
	return getMostRecentDeclImpl(cast<TYPE##Decl>(D));
	#include "clang/AST/DeclNodes.inc"
	}
	llvm_unreachable("unknown decl kind");
	}

	Decl ASTReader::getMostRecentExistingDecl(Decl D) {
	return ASTDeclReader::getMostRecentDecl(D->getCanonicalDecl());
	}

	template<typename DeclT>
	void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
	Redeclarable<DeclT> *D,
	Decl Previous, Decl Canon) {
	D->RedeclLink.setPrevious(cast<DeclT>(Previous));
	D->First = cast<DeclT>(Previous)->First;
	}

	namespace clang {
	template<>
	void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
	Redeclarable<VarDecl> *D,
	Decl Previous, Decl Canon) {
	VarDecl VD = static_cast<VarDecl>(D);
	VarDecl *PrevVD = cast<VarDecl>(Previous);
	D->RedeclLink.setPrevious(PrevVD);
	D->First = PrevVD->First;

	// We should keep at most one definition on the chain.
	// FIXME: Cache the definition once we've found it. Building a chain with
	// N definitions currently takes O(N^2) time here.
	if (VD->isThisDeclarationADefinition() == VarDecl::Definition) {
	for (VarDecl *CurD = PrevVD; CurD; CurD = CurD->getPreviousDecl()) {
	if (CurD->isThisDeclarationADefinition() == VarDecl::Definition) {
	Reader.mergeDefinitionVisibility(CurD, VD);
	VD->demoteThisDefinitionToDeclaration();
	break;
	}
	}
	}
	}

	template<>
	void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
	Redeclarable<FunctionDecl> *D,
	Decl Previous, Decl Canon) {
	FunctionDecl FD = static_cast<FunctionDecl>(D);
	FunctionDecl *PrevFD = cast<FunctionDecl>(Previous);

	FD->RedeclLink.setPrevious(PrevFD);
	FD->First = PrevFD->First;

	// If the previous declaration is an inline function declaration, then this
	// declaration is too.
	if (PrevFD->IsInline != FD->IsInline) {
	// FIXME: [dcl.fct.spec]p4:
	// If a function with external linkage is declared inline in one
	// translation unit, it shall be declared inline in all translation
	// units in which it appears.
	//
	// Be careful of this case:
	//
	// module A:
	// template<typename T> struct X { void f(); };
	// template<typename T> inline void X<T>::f() {}
	//
	// module B instantiates the declaration of X<int>::f
	// module C instantiates the definition of X<int>::f
	//
	// If module B and C are merged, we do not have a violation of this rule.
	FD->IsInline = true;
	}

	// If we need to propagate an exception specification along the redecl
	// chain, make a note of that so that we can do so later.
	auto *FPT = FD->getType()->getAs<FunctionProtoType>();
	auto *PrevFPT = PrevFD->getType()->getAs<FunctionProtoType>();
	if (FPT && PrevFPT) {
	bool IsUnresolved = isUnresolvedExceptionSpec(FPT->getExceptionSpecType());
	bool WasUnresolved =
	isUnresolvedExceptionSpec(PrevFPT->getExceptionSpecType());
	if (IsUnresolved != WasUnresolved)
	Reader.PendingExceptionSpecUpdates.insert(
	std::make_pair(Canon, IsUnresolved ? PrevFD : FD));
	}
	}
	} // end namespace clang

	void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, ...) {
	llvm_unreachable("attachPreviousDecl on non-redeclarable declaration");
	}

	/// Inherit the default template argument from \p From to \p To. Returns
	/// \c false if there is no default template for \p From.
	template <typename ParmDecl>
	static bool inheritDefaultTemplateArgument(ASTContext &Context, ParmDecl *From,
	Decl *ToD) {
	auto *To = cast<ParmDecl>(ToD);
	if (!From->hasDefaultArgument())
	return false;
	To->setInheritedDefaultArgument(Context, From);
	return true;
	}

	static void inheritDefaultTemplateArguments(ASTContext &Context,
	TemplateDecl *From,
	TemplateDecl *To) {
	auto *FromTP = From->getTemplateParameters();
	auto *ToTP = To->getTemplateParameters();
	assert(FromTP->size() == ToTP->size() && "merged mismatched templates?");

	for (unsigned I = 0, N = FromTP->size(); I != N; ++I) {
	NamedDecl *FromParam = FromTP->getParam(N - I - 1);
	if (FromParam->isParameterPack())
	continue;
	NamedDecl *ToParam = ToTP->getParam(N - I - 1);

	if (auto *FTTP = dyn_cast<TemplateTypeParmDecl>(FromParam)) {
	if (!inheritDefaultTemplateArgument(Context, FTTP, ToParam))
	break;
	} else if (auto *FNTTP = dyn_cast<NonTypeTemplateParmDecl>(FromParam)) {
	if (!inheritDefaultTemplateArgument(Context, FNTTP, ToParam))
	break;
	} else {
	if (!inheritDefaultTemplateArgument(
	Context, cast<TemplateTemplateParmDecl>(FromParam), ToParam))
	break;
	}
	}
	}

	void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D,
	Decl Previous, Decl Canon) {
	assert(D && Previous);

	switch (D->getKind()) {
	#define ABSTRACT_DECL(TYPE)
	#define DECL(TYPE, BASE) \
	case Decl::TYPE: \
	attachPreviousDeclImpl(Reader, cast<TYPE##Decl>(D), Previous, Canon); \
	break;
	#include "clang/AST/DeclNodes.inc"
	}

	// If the declaration was visible in one module, a redeclaration of it in
	// another module remains visible even if it wouldn't be visible by itself.
	//
	// FIXME: In this case, the declaration should only be visible if a module
	// that makes it visible has been imported.
	D->IdentifierNamespace \|=
	Previous->IdentifierNamespace &
	(Decl::IDNS_Ordinary \| Decl::IDNS_Tag \| Decl::IDNS_Type);

	// If the declaration declares a template, it may inherit default arguments
	// from the previous declaration.
	if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
	inheritDefaultTemplateArguments(Reader.getContext(),
	cast<TemplateDecl>(Previous), TD);
	}

	template<typename DeclT>
	void ASTDeclReader::attachLatestDeclImpl(Redeclarable<DeclT> D, Decl Latest) {
	D->RedeclLink.setLatest(cast<DeclT>(Latest));
	}
	void ASTDeclReader::attachLatestDeclImpl(...) {
	llvm_unreachable("attachLatestDecl on non-redeclarable declaration");
	}

	void ASTDeclReader::attachLatestDecl(Decl D, Decl Latest) {
	assert(D && Latest);

	switch (D->getKind()) {
	#define ABSTRACT_DECL(TYPE)
	#define DECL(TYPE, BASE) \
	case Decl::TYPE: \
	attachLatestDeclImpl(cast<TYPE##Decl>(D), Latest); \
	break;
	#include "clang/AST/DeclNodes.inc"
	}
	}

	template<typename DeclT>
	void ASTDeclReader::markIncompleteDeclChainImpl(Redeclarable<DeclT> *D) {
	D->RedeclLink.markIncomplete();
	}
	void ASTDeclReader::markIncompleteDeclChainImpl(...) {
	llvm_unreachable("markIncompleteDeclChain on non-redeclarable declaration");
	}

	void ASTReader::markIncompleteDeclChain(Decl *D) {
	switch (D->getKind()) {
	#define ABSTRACT_DECL(TYPE)
	#define DECL(TYPE, BASE) \
	case Decl::TYPE: \
	ASTDeclReader::markIncompleteDeclChainImpl(cast<TYPE##Decl>(D)); \
	break;
	#include "clang/AST/DeclNodes.inc"
	}
	}

	/// \brief Read the declaration at the given offset from the AST file.
	Decl *ASTReader::ReadDeclRecord(DeclID ID) {
	unsigned Index = ID - NUM_PREDEF_DECL_IDS;
	SourceLocation DeclLoc;
	RecordLocation Loc = DeclCursorForID(ID, DeclLoc);
	llvm::BitstreamCursor &DeclsCursor = Loc.F->DeclsCursor;
	// Keep track of where we are in the stream, then jump back there
	// after reading this declaration.
	SavedStreamPosition SavedPosition(DeclsCursor);

	ReadingKindTracker ReadingKind(Read_Decl, *this);

	// Note that we are loading a declaration record.
	Deserializing ADecl(this);

	DeclsCursor.JumpToBit(Loc.Offset);
	ASTRecordReader Record(this, Loc.F);
	ASTDeclReader Reader(*this, Record, Loc, ID, DeclLoc);
	unsigned Code = DeclsCursor.ReadCode();

	ASTContext &Context = getContext();
	Decl *D = nullptr;
	switch ((DeclCode)Record.readRecord(DeclsCursor, Code)) {
	case DECL_CONTEXT_LEXICAL:
	case DECL_CONTEXT_VISIBLE:
	llvm_unreachable("Record cannot be de-serialized with ReadDeclRecord");
	case DECL_TYPEDEF:
	D = TypedefDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_TYPEALIAS:
	D = TypeAliasDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_ENUM:
	D = EnumDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_RECORD:
	D = RecordDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_ENUM_CONSTANT:
	D = EnumConstantDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_FUNCTION:
	D = FunctionDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_LINKAGE_SPEC:
	D = LinkageSpecDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_EXPORT:
	D = ExportDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_LABEL:
	D = LabelDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_NAMESPACE:
	D = NamespaceDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_NAMESPACE_ALIAS:
	D = NamespaceAliasDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_USING:
	D = UsingDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_USING_PACK:
	D = UsingPackDecl::CreateDeserialized(Context, ID, Record.readInt());
	break;
	case DECL_USING_SHADOW:
	D = UsingShadowDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_CONSTRUCTOR_USING_SHADOW:
	D = ConstructorUsingShadowDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_USING_DIRECTIVE:
	D = UsingDirectiveDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_UNRESOLVED_USING_VALUE:
	D = UnresolvedUsingValueDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_UNRESOLVED_USING_TYPENAME:
	D = UnresolvedUsingTypenameDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_CXX_RECORD:
	D = CXXRecordDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_CXX_DEDUCTION_GUIDE:
	D = CXXDeductionGuideDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_CXX_METHOD:
	D = CXXMethodDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_CXX_CONSTRUCTOR:
	D = CXXConstructorDecl::CreateDeserialized(Context, ID, false);
	break;
	case DECL_CXX_INHERITED_CONSTRUCTOR:
	D = CXXConstructorDecl::CreateDeserialized(Context, ID, true);
	break;
	case DECL_CXX_DESTRUCTOR:
	D = CXXDestructorDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_CXX_CONVERSION:
	D = CXXConversionDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_ACCESS_SPEC:
	D = AccessSpecDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_FRIEND:
	D = FriendDecl::CreateDeserialized(Context, ID, Record.readInt());
	break;
	case DECL_FRIEND_TEMPLATE:
	D = FriendTemplateDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_CLASS_TEMPLATE:
	D = ClassTemplateDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_CLASS_TEMPLATE_SPECIALIZATION:
	D = ClassTemplateSpecializationDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_CLASS_TEMPLATE_PARTIAL_SPECIALIZATION:
	D = ClassTemplatePartialSpecializationDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_VAR_TEMPLATE:
	D = VarTemplateDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_VAR_TEMPLATE_SPECIALIZATION:
	D = VarTemplateSpecializationDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_VAR_TEMPLATE_PARTIAL_SPECIALIZATION:
	D = VarTemplatePartialSpecializationDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_CLASS_SCOPE_FUNCTION_SPECIALIZATION:
	D = ClassScopeFunctionSpecializationDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_FUNCTION_TEMPLATE:
	D = FunctionTemplateDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_TEMPLATE_TYPE_PARM:
	D = TemplateTypeParmDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_NON_TYPE_TEMPLATE_PARM:
	D = NonTypeTemplateParmDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK:
	D = NonTypeTemplateParmDecl::CreateDeserialized(Context, ID,
	Record.readInt());
	break;
	case DECL_TEMPLATE_TEMPLATE_PARM:
	D = TemplateTemplateParmDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK:
	D = TemplateTemplateParmDecl::CreateDeserialized(Context, ID,
	Record.readInt());
	break;
	case DECL_TYPE_ALIAS_TEMPLATE:
	D = TypeAliasTemplateDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_STATIC_ASSERT:
	D = StaticAssertDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OBJC_METHOD:
	D = ObjCMethodDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OBJC_INTERFACE:
	D = ObjCInterfaceDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OBJC_IVAR:
	D = ObjCIvarDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OBJC_PROTOCOL:
	D = ObjCProtocolDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OBJC_AT_DEFS_FIELD:
	D = ObjCAtDefsFieldDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OBJC_CATEGORY:
	D = ObjCCategoryDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OBJC_CATEGORY_IMPL:
	D = ObjCCategoryImplDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OBJC_IMPLEMENTATION:
	D = ObjCImplementationDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OBJC_COMPATIBLE_ALIAS:
	D = ObjCCompatibleAliasDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OBJC_PROPERTY:
	D = ObjCPropertyDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OBJC_PROPERTY_IMPL:
	D = ObjCPropertyImplDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_FIELD:
	D = FieldDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_INDIRECTFIELD:
	D = IndirectFieldDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_VAR:
	D = VarDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_IMPLICIT_PARAM:
	D = ImplicitParamDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_PARM_VAR:
	D = ParmVarDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_DECOMPOSITION:
	D = DecompositionDecl::CreateDeserialized(Context, ID, Record.readInt());
	break;
	case DECL_BINDING:
	D = BindingDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_FILE_SCOPE_ASM:
	D = FileScopeAsmDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_BLOCK:
	D = BlockDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_MS_PROPERTY:
	D = MSPropertyDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_CAPTURED:
	D = CapturedDecl::CreateDeserialized(Context, ID, Record.readInt());
	break;
	case DECL_CXX_BASE_SPECIFIERS:
	Error("attempt to read a C++ base-specifier record as a declaration");
	return nullptr;
	case DECL_CXX_CTOR_INITIALIZERS:
	Error("attempt to read a C++ ctor initializer record as a declaration");
	return nullptr;
	case DECL_IMPORT:
	// Note: last entry of the ImportDecl record is the number of stored source
	// locations.
	D = ImportDecl::CreateDeserialized(Context, ID, Record.back());
	break;
	case DECL_OMP_THREADPRIVATE:
	D = OMPThreadPrivateDecl::CreateDeserialized(Context, ID, Record.readInt());
	break;
	case DECL_OMP_DECLARE_REDUCTION:
	D = OMPDeclareReductionDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OMP_CAPTUREDEXPR:
	D = OMPCapturedExprDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_PRAGMA_COMMENT:
	D = PragmaCommentDecl::CreateDeserialized(Context, ID, Record.readInt());
	break;
	case DECL_PRAGMA_DETECT_MISMATCH:
	D = PragmaDetectMismatchDecl::CreateDeserialized(Context, ID,
	Record.readInt());
	break;
	case DECL_EMPTY:
	D = EmptyDecl::CreateDeserialized(Context, ID);
	break;
	case DECL_OBJC_TYPE_PARAM:
	D = ObjCTypeParamDecl::CreateDeserialized(Context, ID);
	break;
	}

	assert(D && "Unknown declaration reading AST file");
	LoadedDecl(Index, D);
	// Set the DeclContext before doing any deserialization, to make sure internal
	// calls to Decl::getASTContext() by Decl's methods will find the
	// TranslationUnitDecl without crashing.
	D->setDeclContext(Context.getTranslationUnitDecl());
	Reader.Visit(D);

	// If this declaration is also a declaration context, get the
	// offsets for its tables of lexical and visible declarations.
	if (DeclContext *DC = dyn_cast<DeclContext>(D)) {
	std::pair<uint64_t, uint64_t> Offsets = Reader.VisitDeclContext(DC);
	if (Offsets.first &&
	ReadLexicalDeclContextStorage(*Loc.F, DeclsCursor, Offsets.first, DC))
	return nullptr;
	if (Offsets.second &&
	ReadVisibleDeclContextStorage(*Loc.F, DeclsCursor, Offsets.second, ID))
	return nullptr;
	}
	assert(Record.getIdx() == Record.size());

	// Load any relevant update records.
	PendingUpdateRecords.push_back(
	PendingUpdateRecord(ID, D, /JustLoaded=/true));

	// Load the categories after recursive loading is finished.
	if (ObjCInterfaceDecl *Class = dyn_cast<ObjCInterfaceDecl>(D))
	// If we already have a definition when deserializing the ObjCInterfaceDecl,
	// we put the Decl in PendingDefinitions so we can pull the categories here.
	if (Class->isThisDeclarationADefinition() \|\|
	PendingDefinitions.count(Class))
	loadObjCCategories(ID, Class);

	// If we have deserialized a declaration that has a definition the
	// AST consumer might need to know about, queue it.
	// We don't pass it to the consumer immediately because we may be in recursive
	// loading, and some declarations may still be initializing.
	PotentiallyInterestingDecls.push_back(
	InterestingDecl(D, Reader.hasPendingBody()));

	return D;
	}

	void ASTReader::PassInterestingDeclsToConsumer() {
	assert(Consumer);

	if (PassingDeclsToConsumer)
	return;

	// Guard variable to avoid recursively redoing the process of passing
	// decls to consumer.
	SaveAndRestore<bool> GuardPassingDeclsToConsumer(PassingDeclsToConsumer,
	true);

	// Ensure that we've loaded all potentially-interesting declarations
	// that need to be eagerly loaded.
	for (auto ID : EagerlyDeserializedDecls)
	GetDecl(ID);
	EagerlyDeserializedDecls.clear();

	while (!PotentiallyInterestingDecls.empty()) {
	InterestingDecl D = PotentiallyInterestingDecls.front();
	PotentiallyInterestingDecls.pop_front();
	if (isConsumerInterestedIn(getContext(), D.getDecl(), D.hasPendingBody()))
	PassInterestingDeclToConsumer(D.getDecl());
	}
	}

	void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) {
	// The declaration may have been modified by files later in the chain.
	// If this is the case, read the record containing the updates from each file
	// and pass it to ASTDeclReader to make the modifications.
	serialization::GlobalDeclID ID = Record.ID;
	Decl *D = Record.D;
	ProcessingUpdatesRAIIObj ProcessingUpdates(*this);
	DeclUpdateOffsetsMap::iterator UpdI = DeclUpdateOffsets.find(ID);

	llvm::SmallVector<serialization::DeclID, 8> PendingLazySpecializationIDs;

	if (UpdI != DeclUpdateOffsets.end()) {
	auto UpdateOffsets = std::move(UpdI->second);
	DeclUpdateOffsets.erase(UpdI);

	// Check if this decl was interesting to the consumer. If we just loaded
	// the declaration, then we know it was interesting and we skip the call
	// to isConsumerInterestedIn because it is unsafe to call in the
	// current ASTReader state.
	bool WasInteresting =
	Record.JustLoaded \|\| isConsumerInterestedIn(getContext(), D, false);
	for (auto &FileAndOffset : UpdateOffsets) {
	ModuleFile *F = FileAndOffset.first;
	uint64_t Offset = FileAndOffset.second;
	llvm::BitstreamCursor &Cursor = F->DeclsCursor;
	SavedStreamPosition SavedPosition(Cursor);
	Cursor.JumpToBit(Offset);
	unsigned Code = Cursor.ReadCode();
	ASTRecordReader Record(this, F);
	unsigned RecCode = Record.readRecord(Cursor, Code);
	(void)RecCode;
	assert(RecCode == DECL_UPDATES && "Expected DECL_UPDATES record!");

	ASTDeclReader Reader(*this, Record, RecordLocation(F, Offset), ID,
	SourceLocation());
	Reader.UpdateDecl(D, PendingLazySpecializationIDs);

	// We might have made this declaration interesting. If so, remember that
	// we need to hand it off to the consumer.
	if (!WasInteresting &&
	isConsumerInterestedIn(getContext(), D, Reader.hasPendingBody())) {
	PotentiallyInterestingDecls.push_back(
	InterestingDecl(D, Reader.hasPendingBody()));
	WasInteresting = true;
	}
	}
	}
	// Add the lazy specializations to the template.
	assert((PendingLazySpecializationIDs.empty() \|\| isa<ClassTemplateDecl>(D) \|\|
	isa<FunctionTemplateDecl>(D) \|\| isa<VarTemplateDecl>(D)) &&
	"Must not have pending specializations");
	if (auto *CTD = dyn_cast<ClassTemplateDecl>(D))
	ASTDeclReader::AddLazySpecializations(CTD, PendingLazySpecializationIDs);
	else if (auto *FTD = dyn_cast<FunctionTemplateDecl>(D))
	ASTDeclReader::AddLazySpecializations(FTD, PendingLazySpecializationIDs);
	else if (auto *VTD = dyn_cast<VarTemplateDecl>(D))
	ASTDeclReader::AddLazySpecializations(VTD, PendingLazySpecializationIDs);
	PendingLazySpecializationIDs.clear();

	// Load the pending visible updates for this decl context, if it has any.
	auto I = PendingVisibleUpdates.find(ID);
	if (I != PendingVisibleUpdates.end()) {
	auto VisibleUpdates = std::move(I->second);
	PendingVisibleUpdates.erase(I);

	auto *DC = cast<DeclContext>(D)->getPrimaryContext();
	for (const PendingVisibleUpdate &Update : VisibleUpdates)
	Lookups[DC].Table.add(
	Update.Mod, Update.Data,
	reader::ASTDeclContextNameLookupTrait(this, Update.Mod));
	DC->setHasExternalVisibleStorage(true);
	}
	}

	void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) {
	// Attach FirstLocal to the end of the decl chain.
	Decl *CanonDecl = FirstLocal->getCanonicalDecl();
	if (FirstLocal != CanonDecl) {
	Decl *PrevMostRecent = ASTDeclReader::getMostRecentDecl(CanonDecl);
	ASTDeclReader::attachPreviousDecl(
	*this, FirstLocal, PrevMostRecent ? PrevMostRecent : CanonDecl,
	CanonDecl);
	}

	if (!LocalOffset) {
	ASTDeclReader::attachLatestDecl(CanonDecl, FirstLocal);
	return;
	}

	// Load the list of other redeclarations from this module file.
	ModuleFile *M = getOwningModuleFile(FirstLocal);
	assert(M && "imported decl from no module file");

	llvm::BitstreamCursor &Cursor = M->DeclsCursor;
	SavedStreamPosition SavedPosition(Cursor);
	Cursor.JumpToBit(LocalOffset);

	RecordData Record;
	unsigned Code = Cursor.ReadCode();
	unsigned RecCode = Cursor.readRecord(Code, Record);
	(void)RecCode;
	assert(RecCode == LOCAL_REDECLARATIONS && "expected LOCAL_REDECLARATIONS record!");

	// FIXME: We have several different dispatches on decl kind here; maybe
	// we should instead generate one loop per kind and dispatch up-front?
	Decl *MostRecent = FirstLocal;
	for (unsigned I = 0, N = Record.size(); I != N; ++I) {
	auto D = GetLocalDecl(M, Record[N - I - 1]);
	ASTDeclReader::attachPreviousDecl(*this, D, MostRecent, CanonDecl);
	MostRecent = D;
	}
	ASTDeclReader::attachLatestDecl(CanonDecl, MostRecent);
	}

	namespace {
	/// \brief Given an ObjC interface, goes through the modules and links to the
	/// interface all the categories for it.
	class ObjCCategoriesVisitor {
	ASTReader &Reader;
	ObjCInterfaceDecl *Interface;
	llvm::SmallPtrSetImpl<ObjCCategoryDecl *> &Deserialized;
	ObjCCategoryDecl *Tail;
	llvm::DenseMap<DeclarationName, ObjCCategoryDecl *> NameCategoryMap;
	serialization::GlobalDeclID InterfaceID;
	unsigned PreviousGeneration;

	void add(ObjCCategoryDecl *Cat) {
	// Only process each category once.
	if (!Deserialized.erase(Cat))
	return;

	// Check for duplicate categories.
	if (Cat->getDeclName()) {
	ObjCCategoryDecl *&Existing = NameCategoryMap[Cat->getDeclName()];
	if (Existing &&
	Reader.getOwningModuleFile(Existing)
	!= Reader.getOwningModuleFile(Cat)) {
	// FIXME: We should not warn for duplicates in diamond:
	//
	// MT //
	// / \ //
	// ML MR //
	// \ / //
	// MB //
	//
	// If there are duplicates in ML/MR, there will be warning when
	// creating MB and when importing MB. We should not warn when
	// importing.
	Reader.Diag(Cat->getLocation(), diag::warn_dup_category_def)
	<< Interface->getDeclName() << Cat->getDeclName();
	Reader.Diag(Existing->getLocation(), diag::note_previous_definition);
	} else if (!Existing) {
	// Record this category.
	Existing = Cat;
	}
	}

	// Add this category to the end of the chain.
	if (Tail)
	ASTDeclReader::setNextObjCCategory(Tail, Cat);
	else
	Interface->setCategoryListRaw(Cat);
	Tail = Cat;
	}

	public:
	ObjCCategoriesVisitor(ASTReader &Reader,
	ObjCInterfaceDecl *Interface,
	llvm::SmallPtrSetImpl<ObjCCategoryDecl *> &Deserialized,
	serialization::GlobalDeclID InterfaceID,
	unsigned PreviousGeneration)
	: Reader(Reader), Interface(Interface), Deserialized(Deserialized),
	Tail(nullptr), InterfaceID(InterfaceID),
	PreviousGeneration(PreviousGeneration)
	{
	// Populate the name -> category map with the set of known categories.
	for (auto *Cat : Interface->known_categories()) {
	if (Cat->getDeclName())
	NameCategoryMap[Cat->getDeclName()] = Cat;

	// Keep track of the tail of the category list.
	Tail = Cat;
	}
	}

	bool operator()(ModuleFile &M) {
	// If we've loaded all of the category information we care about from
	// this module file, we're done.
	if (M.Generation <= PreviousGeneration)
	return true;

	// Map global ID of the definition down to the local ID used in this
	// module file. If there is no such mapping, we'll find nothing here
	// (or in any module it imports).
	DeclID LocalID = Reader.mapGlobalIDToModuleFileGlobalID(M, InterfaceID);
	if (!LocalID)
	return true;

	// Perform a binary search to find the local redeclarations for this
	// declaration (if any).
	const ObjCCategoriesInfo Compare = { LocalID, 0 };
	const ObjCCategoriesInfo *Result
	= std::lower_bound(M.ObjCCategoriesMap,
	M.ObjCCategoriesMap + M.LocalNumObjCCategoriesInMap,
	Compare);
	if (Result == M.ObjCCategoriesMap + M.LocalNumObjCCategoriesInMap \|\|
	Result->DefinitionID != LocalID) {
	// We didn't find anything. If the class definition is in this module
	// file, then the module files it depends on cannot have any categories,
	// so suppress further lookup.
	return Reader.isDeclIDFromModule(InterfaceID, M);
	}

	// We found something. Dig out all of the categories.
	unsigned Offset = Result->Offset;
	unsigned N = M.ObjCCategories[Offset];
	M.ObjCCategories[Offset++] = 0; // Don't try to deserialize again
	for (unsigned I = 0; I != N; ++I)
	add(cast_or_null<ObjCCategoryDecl>(
	Reader.GetLocalDecl(M, M.ObjCCategories[Offset++])));
	return true;
	}
	};
	} // end anonymous namespace

	void ASTReader::loadObjCCategories(serialization::GlobalDeclID ID,
	ObjCInterfaceDecl *D,
	unsigned PreviousGeneration) {
	ObjCCategoriesVisitor Visitor(*this, D, CategoriesDeserialized, ID,
	PreviousGeneration);
	ModuleMgr.visit(Visitor);
	}

	template<typename DeclT, typename Fn>
	static void forAllLaterRedecls(DeclT *D, Fn F) {
	F(D);

	// Check whether we've already merged D into its redeclaration chain.
	// MostRecent may or may not be nullptr if D has not been merged. If
	// not, walk the merged redecl chain and see if it's there.
	auto *MostRecent = D->getMostRecentDecl();
	bool Found = false;
	for (auto *Redecl = MostRecent; Redecl && !Found;
	Redecl = Redecl->getPreviousDecl())
	Found = (Redecl == D);

	// If this declaration is merged, apply the functor to all later decls.
	if (Found) {
	for (auto *Redecl = MostRecent; Redecl != D;
	Redecl = Redecl->getPreviousDecl())
	F(Redecl);
	}
	}

	void ASTDeclReader::UpdateDecl(Decl *D,
	llvm::SmallVectorImpl<serialization::DeclID> &PendingLazySpecializationIDs) {
	while (Record.getIdx() < Record.size()) {
	switch ((DeclUpdateKind)Record.readInt()) {
	case UPD_CXX_ADDED_IMPLICIT_MEMBER: {
	auto *RD = cast<CXXRecordDecl>(D);
	// FIXME: If we also have an update record for instantiating the
	// definition of D, we need that to happen before we get here.
	Decl *MD = Record.readDecl();
	assert(MD && "couldn't read decl from update record");
	// FIXME: We should call addHiddenDecl instead, to add the member
	// to its DeclContext.
	RD->addedMember(MD);
	break;
	}

	case UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION:
	// It will be added to the template's lazy specialization set.
	PendingLazySpecializationIDs.push_back(ReadDeclID());
	break;

	case UPD_CXX_ADDED_ANONYMOUS_NAMESPACE: {
	NamespaceDecl *Anon = ReadDeclAs<NamespaceDecl>();

	// Each module has its own anonymous namespace, which is disjoint from
	// any other module's anonymous namespaces, so don't attach the anonymous
	// namespace at all.
	if (!Record.isModule()) {
	if (TranslationUnitDecl *TU = dyn_cast<TranslationUnitDecl>(D))
	TU->setAnonymousNamespace(Anon);
	else
	cast<NamespaceDecl>(D)->setAnonymousNamespace(Anon);
	}
	break;
	}

	case UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER: {
	VarDecl *VD = cast<VarDecl>(D);
	VD->getMemberSpecializationInfo()->setPointOfInstantiation(
	ReadSourceLocation());
	uint64_t Val = Record.readInt();
	if (Val && !VD->getInit()) {
	VD->setInit(Record.readExpr());
	if (Val > 1) { // IsInitKnownICE = 1, IsInitNotICE = 2, IsInitICE = 3
	EvaluatedStmt *Eval = VD->ensureEvaluatedStmt();
	Eval->CheckedICE = true;
	Eval->IsICE = Val == 3;
	}
	}
	break;
	}

	case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT: {
	auto Param = cast<ParmVarDecl>(D);

	// We have to read the default argument regardless of whether we use it
	// so that hypothetical further update records aren't messed up.
	// TODO: Add a function to skip over the next expr record.
	auto DefaultArg = Record.readExpr();

	// Only apply the update if the parameter still has an uninstantiated
	// default argument.
	if (Param->hasUninstantiatedDefaultArg())
	Param->setDefaultArg(DefaultArg);
	break;
	}

	case UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER: {
	auto FD = cast<FieldDecl>(D);
	auto DefaultInit = Record.readExpr();

	// Only apply the update if the field still has an uninstantiated
	// default member initializer.
	if (FD->hasInClassInitializer() && !FD->getInClassInitializer()) {
	if (DefaultInit)
	FD->setInClassInitializer(DefaultInit);
	else
	// Instantiation failed. We can get here if we serialized an AST for
	// an invalid program.
	FD->removeInClassInitializer();
	}
	break;
	}

	case UPD_CXX_ADDED_FUNCTION_DEFINITION: {
	FunctionDecl *FD = cast<FunctionDecl>(D);
	if (Reader.PendingBodies[FD]) {
	// FIXME: Maybe check for ODR violations.
	// It's safe to stop now because this update record is always last.
	return;
	}

	if (Record.readInt()) {
	// Maintain AST consistency: any later redeclarations of this function
	// are inline if this one is. (We might have merged another declaration
	// into this one.)
	forAllLaterRedecls(FD, [](FunctionDecl *FD) {
	FD->setImplicitlyInline();
	});
	}
	FD->setInnerLocStart(ReadSourceLocation());
	ReadFunctionDefinition(FD);
	assert(Record.getIdx() == Record.size() && "lazy body must be last");
	break;
	}

	case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: {
	auto *RD = cast<CXXRecordDecl>(D);
	auto *OldDD = RD->getCanonicalDecl()->DefinitionData;
	bool HadRealDefinition =
	OldDD && (OldDD->Definition != RD \|\|
	!Reader.PendingFakeDefinitionData.count(OldDD));
	ReadCXXRecordDefinition(RD, /Update/true);

	// Visible update is handled separately.
	uint64_t LexicalOffset = ReadLocalOffset();
	if (!HadRealDefinition && LexicalOffset) {
	Record.readLexicalDeclContextStorage(LexicalOffset, RD);
	Reader.PendingFakeDefinitionData.erase(OldDD);
	}

	auto TSK = (TemplateSpecializationKind)Record.readInt();
	SourceLocation POI = ReadSourceLocation();
	if (MemberSpecializationInfo *MSInfo =
	RD->getMemberSpecializationInfo()) {
	MSInfo->setTemplateSpecializationKind(TSK);
	MSInfo->setPointOfInstantiation(POI);
	} else {
	ClassTemplateSpecializationDecl *Spec =
	cast<ClassTemplateSpecializationDecl>(RD);
	Spec->setTemplateSpecializationKind(TSK);
	Spec->setPointOfInstantiation(POI);

	if (Record.readInt()) {
	auto PartialSpec =
	ReadDeclAs<ClassTemplatePartialSpecializationDecl>();
	SmallVector<TemplateArgument, 8> TemplArgs;
	Record.readTemplateArgumentList(TemplArgs);
	auto *TemplArgList = TemplateArgumentList::CreateCopy(
	Reader.getContext(), TemplArgs);

	// FIXME: If we already have a partial specialization set,
	// check that it matches.
	if (!Spec->getSpecializedTemplateOrPartial()
	.is<ClassTemplatePartialSpecializationDecl *>())
	Spec->setInstantiationOf(PartialSpec, TemplArgList);
	}
	}

	RD->setTagKind((TagTypeKind)Record.readInt());
	RD->setLocation(ReadSourceLocation());
	RD->setLocStart(ReadSourceLocation());
	RD->setBraceRange(ReadSourceRange());

	if (Record.readInt()) {
	AttrVec Attrs;
	Record.readAttributes(Attrs);
	// If the declaration already has attributes, we assume that some other
	// AST file already loaded them.
	if (!D->hasAttrs())
	D->setAttrsImpl(Attrs, Reader.getContext());
	}
	break;
	}

	case UPD_CXX_RESOLVED_DTOR_DELETE: {
	// Set the 'operator delete' directly to avoid emitting another update
	// record.
	auto *Del = ReadDeclAs<FunctionDecl>();
	auto *First = cast<CXXDestructorDecl>(D->getCanonicalDecl());
	// FIXME: Check consistency if we have an old and new operator delete.
	if (!First->OperatorDelete)
	First->OperatorDelete = Del;
	break;
	}

	case UPD_CXX_RESOLVED_EXCEPTION_SPEC: {
	FunctionProtoType::ExceptionSpecInfo ESI;
	SmallVector<QualType, 8> ExceptionStorage;
	Record.readExceptionSpec(ExceptionStorage, ESI);

	// Update this declaration's exception specification, if needed.
	auto *FD = cast<FunctionDecl>(D);
	auto *FPT = FD->getType()->castAs<FunctionProtoType>();
	// FIXME: If the exception specification is already present, check that it
	// matches.
	if (isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) {
	FD->setType(Reader.getContext().getFunctionType(
	FPT->getReturnType(), FPT->getParamTypes(),
	FPT->getExtProtoInfo().withExceptionSpec(ESI)));

	// When we get to the end of deserializing, see if there are other decls
	// that we need to propagate this exception specification onto.
	Reader.PendingExceptionSpecUpdates.insert(
	std::make_pair(FD->getCanonicalDecl(), FD));
	}
	break;
	}

	case UPD_CXX_DEDUCED_RETURN_TYPE: {
	// FIXME: Also do this when merging redecls.
	QualType DeducedResultType = Record.readType();
	for (auto *Redecl : merged_redecls(D)) {
	// FIXME: If the return type is already deduced, check that it matches.
	FunctionDecl *FD = cast<FunctionDecl>(Redecl);
	Reader.getContext().adjustDeducedFunctionResultType(FD,
	DeducedResultType);
	}
	break;
	}

	case UPD_DECL_MARKED_USED: {
	// Maintain AST consistency: any later redeclarations are used too.
	D->markUsed(Reader.getContext());
	break;
	}

	case UPD_MANGLING_NUMBER:
	Reader.getContext().setManglingNumber(cast<NamedDecl>(D),
	Record.readInt());
	break;

	case UPD_STATIC_LOCAL_NUMBER:
	Reader.getContext().setStaticLocalNumber(cast<VarDecl>(D),
	Record.readInt());
	break;

	case UPD_DECL_MARKED_OPENMP_THREADPRIVATE:
	D->addAttr(OMPThreadPrivateDeclAttr::CreateImplicit(Reader.getContext(),
	ReadSourceRange()));
	break;

	case UPD_DECL_EXPORTED: {
	unsigned SubmoduleID = readSubmoduleID();
	auto *Exported = cast<NamedDecl>(D);
	if (auto *TD = dyn_cast<TagDecl>(Exported))
	Exported = TD->getDefinition();
	Module *Owner = SubmoduleID ? Reader.getSubmodule(SubmoduleID) : nullptr;
	if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
	Reader.getContext().mergeDefinitionIntoModule(cast<NamedDecl>(Exported),
	Owner);
	Reader.PendingMergedDefinitionsToDeduplicate.insert(
	cast<NamedDecl>(Exported));
	} else if (Owner && Owner->NameVisibility != Module::AllVisible) {
	// If Owner is made visible at some later point, make this declaration
	// visible too.
	Reader.HiddenNamesMap[Owner].push_back(Exported);
	} else {
	// The declaration is now visible.
	Exported->setVisibleDespiteOwningModule();
	}
	break;
	}

	case UPD_DECL_MARKED_OPENMP_DECLARETARGET:
	case UPD_ADDED_ATTR_TO_RECORD:
	AttrVec Attrs;
	Record.readAttributes(Attrs);
	assert(Attrs.size() == 1);
	D->addAttr(Attrs[0]);
	break;
	}
	}
	}
	Index: head/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp (revision 322855)
	@@ -1,6293 +1,6296 @@
	//===--- ASTWriter.cpp - AST File Writer ------------------------- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the ASTWriter class, which writes AST files.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/Serialization/ASTWriter.h"
	#include "ASTCommon.h"
	#include "ASTReaderInternals.h"
	#include "MultiOnDiskHashTable.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTUnresolvedSet.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclContextInternals.h"
	#include "clang/AST/DeclFriend.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/LambdaCapture.h"
	#include "clang/AST/NestedNameSpecifier.h"
	#include "clang/AST/RawCommentList.h"
	#include "clang/AST/TemplateName.h"
	#include "clang/AST/Type.h"
	#include "clang/AST/TypeLocVisitor.h"
	#include "clang/Basic/DiagnosticOptions.h"
	#include "clang/Basic/FileManager.h"
	#include "clang/Basic/FileSystemOptions.h"
	#include "clang/Basic/LLVM.h"
	#include "clang/Basic/LangOptions.h"
	#include "clang/Basic/MemoryBufferCache.h"
	#include "clang/Basic/Module.h"
	#include "clang/Basic/ObjCRuntime.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/SourceManagerInternals.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Basic/TargetOptions.h"
	#include "clang/Basic/Version.h"
	#include "clang/Basic/VersionTuple.h"
	#include "clang/Lex/HeaderSearch.h"
	#include "clang/Lex/HeaderSearchOptions.h"
	#include "clang/Lex/MacroInfo.h"
	#include "clang/Lex/ModuleMap.h"
	#include "clang/Lex/PreprocessingRecord.h"
	#include "clang/Lex/Preprocessor.h"
	#include "clang/Lex/PreprocessorOptions.h"
	#include "clang/Lex/Token.h"
	#include "clang/Sema/IdentifierResolver.h"
	#include "clang/Sema/ObjCMethodList.h"
	#include "clang/Sema/Sema.h"
	#include "clang/Sema/Weak.h"
	#include "clang/Serialization/ASTReader.h"
	#include "clang/Serialization/Module.h"
	#include "clang/Serialization/ModuleFileExtension.h"
	#include "clang/Serialization/SerializationDiagnostic.h"
	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/Hashing.h"
	#include "llvm/ADT/IntrusiveRefCntPtr.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/SmallString.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/Bitcode/BitCodes.h"
	#include "llvm/Bitcode/BitstreamWriter.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/Compression.h"
	#include "llvm/Support/EndianStream.h"
	#include "llvm/Support/Error.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/MemoryBuffer.h"
	#include "llvm/Support/OnDiskHashTable.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/Process.h"
	#include "llvm/Support/SHA1.h"
	#include "llvm/Support/raw_ostream.h"
	#include <algorithm>
	#include <cassert>
	#include <cstdint>
	#include <cstdlib>
	#include <cstring>
	#include <deque>
	#include <limits>
	#include <new>
	#include <tuple>
	#include <utility>

	using namespace clang;
	using namespace clang::serialization;

	template <typename T, typename Allocator>
	static StringRef bytes(const std::vector<T, Allocator> &v) {
	if (v.empty()) return StringRef();
	return StringRef(reinterpret_cast<const char*>(&v[0]),
	sizeof(T) * v.size());
	}

	template <typename T>
	static StringRef bytes(const SmallVectorImpl<T> &v) {
	return StringRef(reinterpret_cast<const char*>(v.data()),
	sizeof(T) * v.size());
	}

	//===----------------------------------------------------------------------===//
	// Type serialization
	//===----------------------------------------------------------------------===//

	namespace clang {

	class ASTTypeWriter {
	ASTWriter &Writer;
	ASTRecordWriter Record;

	/// \brief Type code that corresponds to the record generated.
	TypeCode Code;
	/// \brief Abbreviation to use for the record, if any.
	unsigned AbbrevToUse;

	public:
	ASTTypeWriter(ASTWriter &Writer, ASTWriter::RecordDataImpl &Record)
	: Writer(Writer), Record(Writer, Record), Code((TypeCode)0), AbbrevToUse(0) { }

	uint64_t Emit() {
	return Record.Emit(Code, AbbrevToUse);
	}

	void Visit(QualType T) {
	if (T.hasLocalNonFastQualifiers()) {
	Qualifiers Qs = T.getLocalQualifiers();
	Record.AddTypeRef(T.getLocalUnqualifiedType());
	Record.push_back(Qs.getAsOpaqueValue());
	Code = TYPE_EXT_QUAL;
	AbbrevToUse = Writer.TypeExtQualAbbrev;
	} else {
	switch (T->getTypeClass()) {
	// For all of the concrete, non-dependent types, call the
	// appropriate visitor function.
	#define TYPE(Class, Base) \
	case Type::Class: Visit##Class##Type(cast<Class##Type>(T)); break;
	#define ABSTRACT_TYPE(Class, Base)
	#include "clang/AST/TypeNodes.def"
	}
	}
	}

	void VisitArrayType(const ArrayType *T);
	void VisitFunctionType(const FunctionType *T);
	void VisitTagType(const TagType *T);

	#define TYPE(Class, Base) void Visit##Class##Type(const Class##Type *T);
	#define ABSTRACT_TYPE(Class, Base)
	#include "clang/AST/TypeNodes.def"
	};

	} // end namespace clang

	void ASTTypeWriter::VisitBuiltinType(const BuiltinType *T) {
	llvm_unreachable("Built-in types are never serialized");
	}

	void ASTTypeWriter::VisitComplexType(const ComplexType *T) {
	Record.AddTypeRef(T->getElementType());
	Code = TYPE_COMPLEX;
	}

	void ASTTypeWriter::VisitPointerType(const PointerType *T) {
	Record.AddTypeRef(T->getPointeeType());
	Code = TYPE_POINTER;
	}

	void ASTTypeWriter::VisitDecayedType(const DecayedType *T) {
	Record.AddTypeRef(T->getOriginalType());
	Code = TYPE_DECAYED;
	}

	void ASTTypeWriter::VisitAdjustedType(const AdjustedType *T) {
	Record.AddTypeRef(T->getOriginalType());
	Record.AddTypeRef(T->getAdjustedType());
	Code = TYPE_ADJUSTED;
	}

	void ASTTypeWriter::VisitBlockPointerType(const BlockPointerType *T) {
	Record.AddTypeRef(T->getPointeeType());
	Code = TYPE_BLOCK_POINTER;
	}

	void ASTTypeWriter::VisitLValueReferenceType(const LValueReferenceType *T) {
	Record.AddTypeRef(T->getPointeeTypeAsWritten());
	Record.push_back(T->isSpelledAsLValue());
	Code = TYPE_LVALUE_REFERENCE;
	}

	void ASTTypeWriter::VisitRValueReferenceType(const RValueReferenceType *T) {
	Record.AddTypeRef(T->getPointeeTypeAsWritten());
	Code = TYPE_RVALUE_REFERENCE;
	}

	void ASTTypeWriter::VisitMemberPointerType(const MemberPointerType *T) {
	Record.AddTypeRef(T->getPointeeType());
	Record.AddTypeRef(QualType(T->getClass(), 0));
	Code = TYPE_MEMBER_POINTER;
	}

	void ASTTypeWriter::VisitArrayType(const ArrayType *T) {
	Record.AddTypeRef(T->getElementType());
	Record.push_back(T->getSizeModifier()); // FIXME: stable values
	Record.push_back(T->getIndexTypeCVRQualifiers()); // FIXME: stable values
	}

	void ASTTypeWriter::VisitConstantArrayType(const ConstantArrayType *T) {
	VisitArrayType(T);
	Record.AddAPInt(T->getSize());
	Code = TYPE_CONSTANT_ARRAY;
	}

	void ASTTypeWriter::VisitIncompleteArrayType(const IncompleteArrayType *T) {
	VisitArrayType(T);
	Code = TYPE_INCOMPLETE_ARRAY;
	}

	void ASTTypeWriter::VisitVariableArrayType(const VariableArrayType *T) {
	VisitArrayType(T);
	Record.AddSourceLocation(T->getLBracketLoc());
	Record.AddSourceLocation(T->getRBracketLoc());
	Record.AddStmt(T->getSizeExpr());
	Code = TYPE_VARIABLE_ARRAY;
	}

	void ASTTypeWriter::VisitVectorType(const VectorType *T) {
	Record.AddTypeRef(T->getElementType());
	Record.push_back(T->getNumElements());
	Record.push_back(T->getVectorKind());
	Code = TYPE_VECTOR;
	}

	void ASTTypeWriter::VisitExtVectorType(const ExtVectorType *T) {
	VisitVectorType(T);
	Code = TYPE_EXT_VECTOR;
	}

	void ASTTypeWriter::VisitFunctionType(const FunctionType *T) {
	Record.AddTypeRef(T->getReturnType());
	FunctionType::ExtInfo C = T->getExtInfo();
	Record.push_back(C.getNoReturn());
	Record.push_back(C.getHasRegParm());
	Record.push_back(C.getRegParm());
	// FIXME: need to stabilize encoding of calling convention...
	Record.push_back(C.getCC());
	Record.push_back(C.getProducesResult());
	Record.push_back(C.getNoCallerSavedRegs());

	if (C.getHasRegParm() \|\| C.getRegParm() \|\| C.getProducesResult())
	AbbrevToUse = 0;
	}

	void ASTTypeWriter::VisitFunctionNoProtoType(const FunctionNoProtoType *T) {
	VisitFunctionType(T);
	Code = TYPE_FUNCTION_NO_PROTO;
	}

	static void addExceptionSpec(const FunctionProtoType *T,
	ASTRecordWriter &Record) {
	Record.push_back(T->getExceptionSpecType());
	if (T->getExceptionSpecType() == EST_Dynamic) {
	Record.push_back(T->getNumExceptions());
	for (unsigned I = 0, N = T->getNumExceptions(); I != N; ++I)
	Record.AddTypeRef(T->getExceptionType(I));
	} else if (T->getExceptionSpecType() == EST_ComputedNoexcept) {
	Record.AddStmt(T->getNoexceptExpr());
	} else if (T->getExceptionSpecType() == EST_Uninstantiated) {
	Record.AddDeclRef(T->getExceptionSpecDecl());
	Record.AddDeclRef(T->getExceptionSpecTemplate());
	} else if (T->getExceptionSpecType() == EST_Unevaluated) {
	Record.AddDeclRef(T->getExceptionSpecDecl());
	}
	}

	void ASTTypeWriter::VisitFunctionProtoType(const FunctionProtoType *T) {
	VisitFunctionType(T);

	Record.push_back(T->isVariadic());
	Record.push_back(T->hasTrailingReturn());
	Record.push_back(T->getTypeQuals());
	Record.push_back(static_cast<unsigned>(T->getRefQualifier()));
	addExceptionSpec(T, Record);

	Record.push_back(T->getNumParams());
	for (unsigned I = 0, N = T->getNumParams(); I != N; ++I)
	Record.AddTypeRef(T->getParamType(I));

	if (T->hasExtParameterInfos()) {
	for (unsigned I = 0, N = T->getNumParams(); I != N; ++I)
	Record.push_back(T->getExtParameterInfo(I).getOpaqueValue());
	}

	if (T->isVariadic() \|\| T->hasTrailingReturn() \|\| T->getTypeQuals() \|\|
	T->getRefQualifier() \|\| T->getExceptionSpecType() != EST_None \|\|
	T->hasExtParameterInfos())
	AbbrevToUse = 0;

	Code = TYPE_FUNCTION_PROTO;
	}

	void ASTTypeWriter::VisitUnresolvedUsingType(const UnresolvedUsingType *T) {
	Record.AddDeclRef(T->getDecl());
	Code = TYPE_UNRESOLVED_USING;
	}

	void ASTTypeWriter::VisitTypedefType(const TypedefType *T) {
	Record.AddDeclRef(T->getDecl());
	assert(!T->isCanonicalUnqualified() && "Invalid typedef ?");
	Record.AddTypeRef(T->getCanonicalTypeInternal());
	Code = TYPE_TYPEDEF;
	}

	void ASTTypeWriter::VisitTypeOfExprType(const TypeOfExprType *T) {
	Record.AddStmt(T->getUnderlyingExpr());
	Code = TYPE_TYPEOF_EXPR;
	}

	void ASTTypeWriter::VisitTypeOfType(const TypeOfType *T) {
	Record.AddTypeRef(T->getUnderlyingType());
	Code = TYPE_TYPEOF;
	}

	void ASTTypeWriter::VisitDecltypeType(const DecltypeType *T) {
	Record.AddTypeRef(T->getUnderlyingType());
	Record.AddStmt(T->getUnderlyingExpr());
	Code = TYPE_DECLTYPE;
	}

	void ASTTypeWriter::VisitUnaryTransformType(const UnaryTransformType *T) {
	Record.AddTypeRef(T->getBaseType());
	Record.AddTypeRef(T->getUnderlyingType());
	Record.push_back(T->getUTTKind());
	Code = TYPE_UNARY_TRANSFORM;
	}

	void ASTTypeWriter::VisitAutoType(const AutoType *T) {
	Record.AddTypeRef(T->getDeducedType());
	Record.push_back((unsigned)T->getKeyword());
	if (T->getDeducedType().isNull())
	Record.push_back(T->isDependentType());
	Code = TYPE_AUTO;
	}

	void ASTTypeWriter::VisitDeducedTemplateSpecializationType(
	const DeducedTemplateSpecializationType *T) {
	Record.AddTemplateName(T->getTemplateName());
	Record.AddTypeRef(T->getDeducedType());
	if (T->getDeducedType().isNull())
	Record.push_back(T->isDependentType());
	Code = TYPE_DEDUCED_TEMPLATE_SPECIALIZATION;
	}

	void ASTTypeWriter::VisitTagType(const TagType *T) {
	Record.push_back(T->isDependentType());
	Record.AddDeclRef(T->getDecl()->getCanonicalDecl());
	assert(!T->isBeingDefined() &&
	"Cannot serialize in the middle of a type definition");
	}

	void ASTTypeWriter::VisitRecordType(const RecordType *T) {
	VisitTagType(T);
	Code = TYPE_RECORD;
	}

	void ASTTypeWriter::VisitEnumType(const EnumType *T) {
	VisitTagType(T);
	Code = TYPE_ENUM;
	}

	void ASTTypeWriter::VisitAttributedType(const AttributedType *T) {
	Record.AddTypeRef(T->getModifiedType());
	Record.AddTypeRef(T->getEquivalentType());
	Record.push_back(T->getAttrKind());
	Code = TYPE_ATTRIBUTED;
	}

	void
	ASTTypeWriter::VisitSubstTemplateTypeParmType(
	const SubstTemplateTypeParmType *T) {
	Record.AddTypeRef(QualType(T->getReplacedParameter(), 0));
	Record.AddTypeRef(T->getReplacementType());
	Code = TYPE_SUBST_TEMPLATE_TYPE_PARM;
	}

	void
	ASTTypeWriter::VisitSubstTemplateTypeParmPackType(
	const SubstTemplateTypeParmPackType *T) {
	Record.AddTypeRef(QualType(T->getReplacedParameter(), 0));
	Record.AddTemplateArgument(T->getArgumentPack());
	Code = TYPE_SUBST_TEMPLATE_TYPE_PARM_PACK;
	}

	void
	ASTTypeWriter::VisitTemplateSpecializationType(
	const TemplateSpecializationType *T) {
	Record.push_back(T->isDependentType());
	Record.AddTemplateName(T->getTemplateName());
	Record.push_back(T->getNumArgs());
	for (const auto &ArgI : *T)
	Record.AddTemplateArgument(ArgI);
	Record.AddTypeRef(T->isTypeAlias() ? T->getAliasedType()
	: T->isCanonicalUnqualified()
	? QualType()
	: T->getCanonicalTypeInternal());
	Code = TYPE_TEMPLATE_SPECIALIZATION;
	}

	void
	ASTTypeWriter::VisitDependentSizedArrayType(const DependentSizedArrayType *T) {
	VisitArrayType(T);
	Record.AddStmt(T->getSizeExpr());
	Record.AddSourceRange(T->getBracketsRange());
	Code = TYPE_DEPENDENT_SIZED_ARRAY;
	}

	void
	ASTTypeWriter::VisitDependentSizedExtVectorType(
	const DependentSizedExtVectorType *T) {
	Record.AddTypeRef(T->getElementType());
	Record.AddStmt(T->getSizeExpr());
	Record.AddSourceLocation(T->getAttributeLoc());
	Code = TYPE_DEPENDENT_SIZED_EXT_VECTOR;
	}

	void
	ASTTypeWriter::VisitTemplateTypeParmType(const TemplateTypeParmType *T) {
	Record.push_back(T->getDepth());
	Record.push_back(T->getIndex());
	Record.push_back(T->isParameterPack());
	Record.AddDeclRef(T->getDecl());
	Code = TYPE_TEMPLATE_TYPE_PARM;
	}

	void
	ASTTypeWriter::VisitDependentNameType(const DependentNameType *T) {
	Record.push_back(T->getKeyword());
	Record.AddNestedNameSpecifier(T->getQualifier());
	Record.AddIdentifierRef(T->getIdentifier());
	Record.AddTypeRef(
	T->isCanonicalUnqualified() ? QualType() : T->getCanonicalTypeInternal());
	Code = TYPE_DEPENDENT_NAME;
	}

	void
	ASTTypeWriter::VisitDependentTemplateSpecializationType(
	const DependentTemplateSpecializationType *T) {
	Record.push_back(T->getKeyword());
	Record.AddNestedNameSpecifier(T->getQualifier());
	Record.AddIdentifierRef(T->getIdentifier());
	Record.push_back(T->getNumArgs());
	for (const auto &I : *T)
	Record.AddTemplateArgument(I);
	Code = TYPE_DEPENDENT_TEMPLATE_SPECIALIZATION;
	}

	void ASTTypeWriter::VisitPackExpansionType(const PackExpansionType *T) {
	Record.AddTypeRef(T->getPattern());
	if (Optional<unsigned> NumExpansions = T->getNumExpansions())
	Record.push_back(*NumExpansions + 1);
	else
	Record.push_back(0);
	Code = TYPE_PACK_EXPANSION;
	}

	void ASTTypeWriter::VisitParenType(const ParenType *T) {
	Record.AddTypeRef(T->getInnerType());
	Code = TYPE_PAREN;
	}

	void ASTTypeWriter::VisitElaboratedType(const ElaboratedType *T) {
	Record.push_back(T->getKeyword());
	Record.AddNestedNameSpecifier(T->getQualifier());
	Record.AddTypeRef(T->getNamedType());
	Code = TYPE_ELABORATED;
	}

	void ASTTypeWriter::VisitInjectedClassNameType(const InjectedClassNameType *T) {
	Record.AddDeclRef(T->getDecl()->getCanonicalDecl());
	Record.AddTypeRef(T->getInjectedSpecializationType());
	Code = TYPE_INJECTED_CLASS_NAME;
	}

	void ASTTypeWriter::VisitObjCInterfaceType(const ObjCInterfaceType *T) {
	Record.AddDeclRef(T->getDecl()->getCanonicalDecl());
	Code = TYPE_OBJC_INTERFACE;
	}

	void ASTTypeWriter::VisitObjCTypeParamType(const ObjCTypeParamType *T) {
	Record.AddDeclRef(T->getDecl());
	Record.push_back(T->getNumProtocols());
	for (const auto *I : T->quals())
	Record.AddDeclRef(I);
	Code = TYPE_OBJC_TYPE_PARAM;
	}

	void ASTTypeWriter::VisitObjCObjectType(const ObjCObjectType *T) {
	Record.AddTypeRef(T->getBaseType());
	Record.push_back(T->getTypeArgsAsWritten().size());
	for (auto TypeArg : T->getTypeArgsAsWritten())
	Record.AddTypeRef(TypeArg);
	Record.push_back(T->getNumProtocols());
	for (const auto *I : T->quals())
	Record.AddDeclRef(I);
	Record.push_back(T->isKindOfTypeAsWritten());
	Code = TYPE_OBJC_OBJECT;
	}

	void
	ASTTypeWriter::VisitObjCObjectPointerType(const ObjCObjectPointerType *T) {
	Record.AddTypeRef(T->getPointeeType());
	Code = TYPE_OBJC_OBJECT_POINTER;
	}

	void
	ASTTypeWriter::VisitAtomicType(const AtomicType *T) {
	Record.AddTypeRef(T->getValueType());
	Code = TYPE_ATOMIC;
	}

	void
	ASTTypeWriter::VisitPipeType(const PipeType *T) {
	Record.AddTypeRef(T->getElementType());
	Record.push_back(T->isReadOnly());
	Code = TYPE_PIPE;
	}

	namespace {

	class TypeLocWriter : public TypeLocVisitor<TypeLocWriter> {
	ASTRecordWriter &Record;

	public:
	TypeLocWriter(ASTRecordWriter &Record)
	: Record(Record) { }

	#define ABSTRACT_TYPELOC(CLASS, PARENT)
	#define TYPELOC(CLASS, PARENT) \
	void Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc);
	#include "clang/AST/TypeLocNodes.def"

	void VisitArrayTypeLoc(ArrayTypeLoc TyLoc);
	void VisitFunctionTypeLoc(FunctionTypeLoc TyLoc);
	};

	} // end anonymous namespace

	void TypeLocWriter::VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
	// nothing to do
	}

	void TypeLocWriter::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) {
	Record.AddSourceLocation(TL.getBuiltinLoc());
	if (TL.needsExtraLocalData()) {
	Record.push_back(TL.getWrittenTypeSpec());
	Record.push_back(TL.getWrittenSignSpec());
	Record.push_back(TL.getWrittenWidthSpec());
	Record.push_back(TL.hasModeAttr());
	}
	}

	void TypeLocWriter::VisitComplexTypeLoc(ComplexTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitPointerTypeLoc(PointerTypeLoc TL) {
	Record.AddSourceLocation(TL.getStarLoc());
	}

	void TypeLocWriter::VisitDecayedTypeLoc(DecayedTypeLoc TL) {
	// nothing to do
	}

	void TypeLocWriter::VisitAdjustedTypeLoc(AdjustedTypeLoc TL) {
	// nothing to do
	}

	void TypeLocWriter::VisitBlockPointerTypeLoc(BlockPointerTypeLoc TL) {
	Record.AddSourceLocation(TL.getCaretLoc());
	}

	void TypeLocWriter::VisitLValueReferenceTypeLoc(LValueReferenceTypeLoc TL) {
	Record.AddSourceLocation(TL.getAmpLoc());
	}

	void TypeLocWriter::VisitRValueReferenceTypeLoc(RValueReferenceTypeLoc TL) {
	Record.AddSourceLocation(TL.getAmpAmpLoc());
	}

	void TypeLocWriter::VisitMemberPointerTypeLoc(MemberPointerTypeLoc TL) {
	Record.AddSourceLocation(TL.getStarLoc());
	Record.AddTypeSourceInfo(TL.getClassTInfo());
	}

	void TypeLocWriter::VisitArrayTypeLoc(ArrayTypeLoc TL) {
	Record.AddSourceLocation(TL.getLBracketLoc());
	Record.AddSourceLocation(TL.getRBracketLoc());
	Record.push_back(TL.getSizeExpr() ? 1 : 0);
	if (TL.getSizeExpr())
	Record.AddStmt(TL.getSizeExpr());
	}

	void TypeLocWriter::VisitConstantArrayTypeLoc(ConstantArrayTypeLoc TL) {
	VisitArrayTypeLoc(TL);
	}

	void TypeLocWriter::VisitIncompleteArrayTypeLoc(IncompleteArrayTypeLoc TL) {
	VisitArrayTypeLoc(TL);
	}

	void TypeLocWriter::VisitVariableArrayTypeLoc(VariableArrayTypeLoc TL) {
	VisitArrayTypeLoc(TL);
	}

	void TypeLocWriter::VisitDependentSizedArrayTypeLoc(
	DependentSizedArrayTypeLoc TL) {
	VisitArrayTypeLoc(TL);
	}

	void TypeLocWriter::VisitDependentSizedExtVectorTypeLoc(
	DependentSizedExtVectorTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitVectorTypeLoc(VectorTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitExtVectorTypeLoc(ExtVectorTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitFunctionTypeLoc(FunctionTypeLoc TL) {
	Record.AddSourceLocation(TL.getLocalRangeBegin());
	Record.AddSourceLocation(TL.getLParenLoc());
	Record.AddSourceLocation(TL.getRParenLoc());
	Record.AddSourceRange(TL.getExceptionSpecRange());
	Record.AddSourceLocation(TL.getLocalRangeEnd());
	for (unsigned i = 0, e = TL.getNumParams(); i != e; ++i)
	Record.AddDeclRef(TL.getParam(i));
	}
	void TypeLocWriter::VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc TL) {
	VisitFunctionTypeLoc(TL);
	}
	void TypeLocWriter::VisitFunctionNoProtoTypeLoc(FunctionNoProtoTypeLoc TL) {
	VisitFunctionTypeLoc(TL);
	}
	void TypeLocWriter::VisitUnresolvedUsingTypeLoc(UnresolvedUsingTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}
	void TypeLocWriter::VisitTypedefTypeLoc(TypedefTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}
	void TypeLocWriter::VisitObjCTypeParamTypeLoc(ObjCTypeParamTypeLoc TL) {
	if (TL.getNumProtocols()) {
	Record.AddSourceLocation(TL.getProtocolLAngleLoc());
	Record.AddSourceLocation(TL.getProtocolRAngleLoc());
	}
	for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i)
	Record.AddSourceLocation(TL.getProtocolLoc(i));
	}
	void TypeLocWriter::VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) {
	Record.AddSourceLocation(TL.getTypeofLoc());
	Record.AddSourceLocation(TL.getLParenLoc());
	Record.AddSourceLocation(TL.getRParenLoc());
	}

	void TypeLocWriter::VisitTypeOfTypeLoc(TypeOfTypeLoc TL) {
	Record.AddSourceLocation(TL.getTypeofLoc());
	Record.AddSourceLocation(TL.getLParenLoc());
	Record.AddSourceLocation(TL.getRParenLoc());
	Record.AddTypeSourceInfo(TL.getUnderlyingTInfo());
	}

	void TypeLocWriter::VisitDecltypeTypeLoc(DecltypeTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitUnaryTransformTypeLoc(UnaryTransformTypeLoc TL) {
	Record.AddSourceLocation(TL.getKWLoc());
	Record.AddSourceLocation(TL.getLParenLoc());
	Record.AddSourceLocation(TL.getRParenLoc());
	Record.AddTypeSourceInfo(TL.getUnderlyingTInfo());
	}

	void TypeLocWriter::VisitAutoTypeLoc(AutoTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitDeducedTemplateSpecializationTypeLoc(
	DeducedTemplateSpecializationTypeLoc TL) {
	Record.AddSourceLocation(TL.getTemplateNameLoc());
	}

	void TypeLocWriter::VisitRecordTypeLoc(RecordTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitEnumTypeLoc(EnumTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitAttributedTypeLoc(AttributedTypeLoc TL) {
	Record.AddSourceLocation(TL.getAttrNameLoc());
	if (TL.hasAttrOperand()) {
	SourceRange range = TL.getAttrOperandParensRange();
	Record.AddSourceLocation(range.getBegin());
	Record.AddSourceLocation(range.getEnd());
	}
	if (TL.hasAttrExprOperand()) {
	Expr *operand = TL.getAttrExprOperand();
	Record.push_back(operand ? 1 : 0);
	if (operand) Record.AddStmt(operand);
	} else if (TL.hasAttrEnumOperand()) {
	Record.AddSourceLocation(TL.getAttrEnumOperandLoc());
	}
	}

	void TypeLocWriter::VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitSubstTemplateTypeParmTypeLoc(
	SubstTemplateTypeParmTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitSubstTemplateTypeParmPackTypeLoc(
	SubstTemplateTypeParmPackTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitTemplateSpecializationTypeLoc(
	TemplateSpecializationTypeLoc TL) {
	Record.AddSourceLocation(TL.getTemplateKeywordLoc());
	Record.AddSourceLocation(TL.getTemplateNameLoc());
	Record.AddSourceLocation(TL.getLAngleLoc());
	Record.AddSourceLocation(TL.getRAngleLoc());
	for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i)
	Record.AddTemplateArgumentLocInfo(TL.getArgLoc(i).getArgument().getKind(),
	TL.getArgLoc(i).getLocInfo());
	}

	void TypeLocWriter::VisitParenTypeLoc(ParenTypeLoc TL) {
	Record.AddSourceLocation(TL.getLParenLoc());
	Record.AddSourceLocation(TL.getRParenLoc());
	}

	void TypeLocWriter::VisitElaboratedTypeLoc(ElaboratedTypeLoc TL) {
	Record.AddSourceLocation(TL.getElaboratedKeywordLoc());
	Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc());
	}

	void TypeLocWriter::VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {
	Record.AddSourceLocation(TL.getElaboratedKeywordLoc());
	Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc());
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitDependentTemplateSpecializationTypeLoc(
	DependentTemplateSpecializationTypeLoc TL) {
	Record.AddSourceLocation(TL.getElaboratedKeywordLoc());
	Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc());
	Record.AddSourceLocation(TL.getTemplateKeywordLoc());
	Record.AddSourceLocation(TL.getTemplateNameLoc());
	Record.AddSourceLocation(TL.getLAngleLoc());
	Record.AddSourceLocation(TL.getRAngleLoc());
	for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I)
	Record.AddTemplateArgumentLocInfo(TL.getArgLoc(I).getArgument().getKind(),
	TL.getArgLoc(I).getLocInfo());
	}

	void TypeLocWriter::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) {
	Record.AddSourceLocation(TL.getEllipsisLoc());
	}

	void TypeLocWriter::VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) {
	Record.AddSourceLocation(TL.getNameLoc());
	}

	void TypeLocWriter::VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) {
	Record.push_back(TL.hasBaseTypeAsWritten());
	Record.AddSourceLocation(TL.getTypeArgsLAngleLoc());
	Record.AddSourceLocation(TL.getTypeArgsRAngleLoc());
	for (unsigned i = 0, e = TL.getNumTypeArgs(); i != e; ++i)
	Record.AddTypeSourceInfo(TL.getTypeArgTInfo(i));
	Record.AddSourceLocation(TL.getProtocolLAngleLoc());
	Record.AddSourceLocation(TL.getProtocolRAngleLoc());
	for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i)
	Record.AddSourceLocation(TL.getProtocolLoc(i));
	}

	void TypeLocWriter::VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) {
	Record.AddSourceLocation(TL.getStarLoc());
	}

	void TypeLocWriter::VisitAtomicTypeLoc(AtomicTypeLoc TL) {
	Record.AddSourceLocation(TL.getKWLoc());
	Record.AddSourceLocation(TL.getLParenLoc());
	Record.AddSourceLocation(TL.getRParenLoc());
	}

	void TypeLocWriter::VisitPipeTypeLoc(PipeTypeLoc TL) {
	Record.AddSourceLocation(TL.getKWLoc());
	}

	void ASTWriter::WriteTypeAbbrevs() {
	using namespace llvm;

	std::shared_ptr<BitCodeAbbrev> Abv;

	// Abbreviation for TYPE_EXT_QUAL
	Abv = std::make_shared<BitCodeAbbrev>();
	Abv->Add(BitCodeAbbrevOp(serialization::TYPE_EXT_QUAL));
	Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type
	Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 3)); // Quals
	TypeExtQualAbbrev = Stream.EmitAbbrev(std::move(Abv));

	// Abbreviation for TYPE_FUNCTION_PROTO
	Abv = std::make_shared<BitCodeAbbrev>();
	Abv->Add(BitCodeAbbrevOp(serialization::TYPE_FUNCTION_PROTO));
	// FunctionType
	Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ReturnType
	Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // NoReturn
	Abv->Add(BitCodeAbbrevOp(0)); // HasRegParm
	Abv->Add(BitCodeAbbrevOp(0)); // RegParm
	Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // CC
	Abv->Add(BitCodeAbbrevOp(0)); // ProducesResult
	Abv->Add(BitCodeAbbrevOp(0)); // NoCallerSavedRegs
	// FunctionProtoType
	Abv->Add(BitCodeAbbrevOp(0)); // IsVariadic
	Abv->Add(BitCodeAbbrevOp(0)); // HasTrailingReturn
	Abv->Add(BitCodeAbbrevOp(0)); // TypeQuals
	Abv->Add(BitCodeAbbrevOp(0)); // RefQualifier
	Abv->Add(BitCodeAbbrevOp(EST_None)); // ExceptionSpec
	Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // NumParams
	Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
	Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Params
	TypeFunctionProtoAbbrev = Stream.EmitAbbrev(std::move(Abv));
	}

	//===----------------------------------------------------------------------===//
	// ASTWriter Implementation
	//===----------------------------------------------------------------------===//

	static void EmitBlockID(unsigned ID, const char *Name,
	llvm::BitstreamWriter &Stream,
	ASTWriter::RecordDataImpl &Record) {
	Record.clear();
	Record.push_back(ID);
	Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);

	// Emit the block name if present.
	if (!Name \|\| Name[0] == 0)
	return;
	Record.clear();
	while (*Name)
	Record.push_back(*Name++);
	Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record);
	}

	static void EmitRecordID(unsigned ID, const char *Name,
	llvm::BitstreamWriter &Stream,
	ASTWriter::RecordDataImpl &Record) {
	Record.clear();
	Record.push_back(ID);
	while (*Name)
	Record.push_back(*Name++);
	Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
	}

	static void AddStmtsExprs(llvm::BitstreamWriter &Stream,
	ASTWriter::RecordDataImpl &Record) {
	#define RECORD(X) EmitRecordID(X, #X, Stream, Record)
	RECORD(STMT_STOP);
	RECORD(STMT_NULL_PTR);
	RECORD(STMT_REF_PTR);
	RECORD(STMT_NULL);
	RECORD(STMT_COMPOUND);
	RECORD(STMT_CASE);
	RECORD(STMT_DEFAULT);
	RECORD(STMT_LABEL);
	RECORD(STMT_ATTRIBUTED);
	RECORD(STMT_IF);
	RECORD(STMT_SWITCH);
	RECORD(STMT_WHILE);
	RECORD(STMT_DO);
	RECORD(STMT_FOR);
	RECORD(STMT_GOTO);
	RECORD(STMT_INDIRECT_GOTO);
	RECORD(STMT_CONTINUE);
	RECORD(STMT_BREAK);
	RECORD(STMT_RETURN);
	RECORD(STMT_DECL);
	RECORD(STMT_GCCASM);
	RECORD(STMT_MSASM);
	RECORD(EXPR_PREDEFINED);
	RECORD(EXPR_DECL_REF);
	RECORD(EXPR_INTEGER_LITERAL);
	RECORD(EXPR_FLOATING_LITERAL);
	RECORD(EXPR_IMAGINARY_LITERAL);
	RECORD(EXPR_STRING_LITERAL);
	RECORD(EXPR_CHARACTER_LITERAL);
	RECORD(EXPR_PAREN);
	RECORD(EXPR_PAREN_LIST);
	RECORD(EXPR_UNARY_OPERATOR);
	RECORD(EXPR_SIZEOF_ALIGN_OF);
	RECORD(EXPR_ARRAY_SUBSCRIPT);
	RECORD(EXPR_CALL);
	RECORD(EXPR_MEMBER);
	RECORD(EXPR_BINARY_OPERATOR);
	RECORD(EXPR_COMPOUND_ASSIGN_OPERATOR);
	RECORD(EXPR_CONDITIONAL_OPERATOR);
	RECORD(EXPR_IMPLICIT_CAST);
	RECORD(EXPR_CSTYLE_CAST);
	RECORD(EXPR_COMPOUND_LITERAL);
	RECORD(EXPR_EXT_VECTOR_ELEMENT);
	RECORD(EXPR_INIT_LIST);
	RECORD(EXPR_DESIGNATED_INIT);
	RECORD(EXPR_DESIGNATED_INIT_UPDATE);
	RECORD(EXPR_IMPLICIT_VALUE_INIT);
	RECORD(EXPR_NO_INIT);
	RECORD(EXPR_VA_ARG);
	RECORD(EXPR_ADDR_LABEL);
	RECORD(EXPR_STMT);
	RECORD(EXPR_CHOOSE);
	RECORD(EXPR_GNU_NULL);
	RECORD(EXPR_SHUFFLE_VECTOR);
	RECORD(EXPR_BLOCK);
	RECORD(EXPR_GENERIC_SELECTION);
	RECORD(EXPR_OBJC_STRING_LITERAL);
	RECORD(EXPR_OBJC_BOXED_EXPRESSION);
	RECORD(EXPR_OBJC_ARRAY_LITERAL);
	RECORD(EXPR_OBJC_DICTIONARY_LITERAL);
	RECORD(EXPR_OBJC_ENCODE);
	RECORD(EXPR_OBJC_SELECTOR_EXPR);
	RECORD(EXPR_OBJC_PROTOCOL_EXPR);
	RECORD(EXPR_OBJC_IVAR_REF_EXPR);
	RECORD(EXPR_OBJC_PROPERTY_REF_EXPR);
	RECORD(EXPR_OBJC_KVC_REF_EXPR);
	RECORD(EXPR_OBJC_MESSAGE_EXPR);
	RECORD(STMT_OBJC_FOR_COLLECTION);
	RECORD(STMT_OBJC_CATCH);
	RECORD(STMT_OBJC_FINALLY);
	RECORD(STMT_OBJC_AT_TRY);
	RECORD(STMT_OBJC_AT_SYNCHRONIZED);
	RECORD(STMT_OBJC_AT_THROW);
	RECORD(EXPR_OBJC_BOOL_LITERAL);
	RECORD(STMT_CXX_CATCH);
	RECORD(STMT_CXX_TRY);
	RECORD(STMT_CXX_FOR_RANGE);
	RECORD(EXPR_CXX_OPERATOR_CALL);
	RECORD(EXPR_CXX_MEMBER_CALL);
	RECORD(EXPR_CXX_CONSTRUCT);
	RECORD(EXPR_CXX_TEMPORARY_OBJECT);
	RECORD(EXPR_CXX_STATIC_CAST);
	RECORD(EXPR_CXX_DYNAMIC_CAST);
	RECORD(EXPR_CXX_REINTERPRET_CAST);
	RECORD(EXPR_CXX_CONST_CAST);
	RECORD(EXPR_CXX_FUNCTIONAL_CAST);
	RECORD(EXPR_USER_DEFINED_LITERAL);
	RECORD(EXPR_CXX_STD_INITIALIZER_LIST);
	RECORD(EXPR_CXX_BOOL_LITERAL);
	RECORD(EXPR_CXX_NULL_PTR_LITERAL);
	RECORD(EXPR_CXX_TYPEID_EXPR);
	RECORD(EXPR_CXX_TYPEID_TYPE);
	RECORD(EXPR_CXX_THIS);
	RECORD(EXPR_CXX_THROW);
	RECORD(EXPR_CXX_DEFAULT_ARG);
	RECORD(EXPR_CXX_DEFAULT_INIT);
	RECORD(EXPR_CXX_BIND_TEMPORARY);
	RECORD(EXPR_CXX_SCALAR_VALUE_INIT);
	RECORD(EXPR_CXX_NEW);
	RECORD(EXPR_CXX_DELETE);
	RECORD(EXPR_CXX_PSEUDO_DESTRUCTOR);
	RECORD(EXPR_EXPR_WITH_CLEANUPS);
	RECORD(EXPR_CXX_DEPENDENT_SCOPE_MEMBER);
	RECORD(EXPR_CXX_DEPENDENT_SCOPE_DECL_REF);
	RECORD(EXPR_CXX_UNRESOLVED_CONSTRUCT);
	RECORD(EXPR_CXX_UNRESOLVED_MEMBER);
	RECORD(EXPR_CXX_UNRESOLVED_LOOKUP);
	RECORD(EXPR_CXX_EXPRESSION_TRAIT);
	RECORD(EXPR_CXX_NOEXCEPT);
	RECORD(EXPR_OPAQUE_VALUE);
	RECORD(EXPR_BINARY_CONDITIONAL_OPERATOR);
	RECORD(EXPR_TYPE_TRAIT);
	RECORD(EXPR_ARRAY_TYPE_TRAIT);
	RECORD(EXPR_PACK_EXPANSION);
	RECORD(EXPR_SIZEOF_PACK);
	RECORD(EXPR_SUBST_NON_TYPE_TEMPLATE_PARM);
	RECORD(EXPR_SUBST_NON_TYPE_TEMPLATE_PARM_PACK);
	RECORD(EXPR_FUNCTION_PARM_PACK);
	RECORD(EXPR_MATERIALIZE_TEMPORARY);
	RECORD(EXPR_CUDA_KERNEL_CALL);
	RECORD(EXPR_CXX_UUIDOF_EXPR);
	RECORD(EXPR_CXX_UUIDOF_TYPE);
	RECORD(EXPR_LAMBDA);
	#undef RECORD
	}

	void ASTWriter::WriteBlockInfoBlock() {
	RecordData Record;
	Stream.EnterBlockInfoBlock();

	#define BLOCK(X) EmitBlockID(X ## _ID, #X, Stream, Record)
	#define RECORD(X) EmitRecordID(X, #X, Stream, Record)

	// Control Block.
	BLOCK(CONTROL_BLOCK);
	RECORD(METADATA);
	RECORD(MODULE_NAME);
	RECORD(MODULE_DIRECTORY);
	RECORD(MODULE_MAP_FILE);
	RECORD(IMPORTS);
	RECORD(ORIGINAL_FILE);
	RECORD(ORIGINAL_PCH_DIR);
	RECORD(ORIGINAL_FILE_ID);
	RECORD(INPUT_FILE_OFFSETS);

	BLOCK(OPTIONS_BLOCK);
	RECORD(LANGUAGE_OPTIONS);
	RECORD(TARGET_OPTIONS);
	RECORD(FILE_SYSTEM_OPTIONS);
	RECORD(HEADER_SEARCH_OPTIONS);
	RECORD(PREPROCESSOR_OPTIONS);

	BLOCK(INPUT_FILES_BLOCK);
	RECORD(INPUT_FILE);

	// AST Top-Level Block.
	BLOCK(AST_BLOCK);
	RECORD(TYPE_OFFSET);
	RECORD(DECL_OFFSET);
	RECORD(IDENTIFIER_OFFSET);
	RECORD(IDENTIFIER_TABLE);
	RECORD(EAGERLY_DESERIALIZED_DECLS);
	RECORD(MODULAR_CODEGEN_DECLS);
	RECORD(SPECIAL_TYPES);
	RECORD(STATISTICS);
	RECORD(TENTATIVE_DEFINITIONS);
	RECORD(SELECTOR_OFFSETS);
	RECORD(METHOD_POOL);
	RECORD(PP_COUNTER_VALUE);
	RECORD(SOURCE_LOCATION_OFFSETS);
	RECORD(SOURCE_LOCATION_PRELOADS);
	RECORD(EXT_VECTOR_DECLS);
	RECORD(UNUSED_FILESCOPED_DECLS);
	RECORD(PPD_ENTITIES_OFFSETS);
	RECORD(VTABLE_USES);
	RECORD(REFERENCED_SELECTOR_POOL);
	RECORD(TU_UPDATE_LEXICAL);
	RECORD(SEMA_DECL_REFS);
	RECORD(WEAK_UNDECLARED_IDENTIFIERS);
	RECORD(PENDING_IMPLICIT_INSTANTIATIONS);
	RECORD(UPDATE_VISIBLE);
	RECORD(DECL_UPDATE_OFFSETS);
	RECORD(DECL_UPDATES);
	RECORD(CUDA_SPECIAL_DECL_REFS);
	RECORD(HEADER_SEARCH_TABLE);
	RECORD(FP_PRAGMA_OPTIONS);
	RECORD(OPENCL_EXTENSIONS);
	RECORD(OPENCL_EXTENSION_TYPES);
	RECORD(OPENCL_EXTENSION_DECLS);
	RECORD(DELEGATING_CTORS);
	RECORD(KNOWN_NAMESPACES);
	RECORD(MODULE_OFFSET_MAP);
	RECORD(SOURCE_MANAGER_LINE_TABLE);
	RECORD(OBJC_CATEGORIES_MAP);
	RECORD(FILE_SORTED_DECLS);
	RECORD(IMPORTED_MODULES);
	RECORD(OBJC_CATEGORIES);
	RECORD(MACRO_OFFSET);
	RECORD(INTERESTING_IDENTIFIERS);
	RECORD(UNDEFINED_BUT_USED);
	RECORD(LATE_PARSED_TEMPLATE);
	RECORD(OPTIMIZE_PRAGMA_OPTIONS);
	RECORD(MSSTRUCT_PRAGMA_OPTIONS);
	RECORD(POINTERS_TO_MEMBERS_PRAGMA_OPTIONS);
	RECORD(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES);
	RECORD(DELETE_EXPRS_TO_ANALYZE);
	RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
	RECORD(PP_CONDITIONAL_STACK);

	// SourceManager Block.
	BLOCK(SOURCE_MANAGER_BLOCK);
	RECORD(SM_SLOC_FILE_ENTRY);
	RECORD(SM_SLOC_BUFFER_ENTRY);
	RECORD(SM_SLOC_BUFFER_BLOB);
	RECORD(SM_SLOC_BUFFER_BLOB_COMPRESSED);
	RECORD(SM_SLOC_EXPANSION_ENTRY);

	// Preprocessor Block.
	BLOCK(PREPROCESSOR_BLOCK);
	RECORD(PP_MACRO_DIRECTIVE_HISTORY);
	RECORD(PP_MACRO_FUNCTION_LIKE);
	RECORD(PP_MACRO_OBJECT_LIKE);
	RECORD(PP_MODULE_MACRO);
	RECORD(PP_TOKEN);

	// Submodule Block.
	BLOCK(SUBMODULE_BLOCK);
	RECORD(SUBMODULE_METADATA);
	RECORD(SUBMODULE_DEFINITION);
	RECORD(SUBMODULE_UMBRELLA_HEADER);
	RECORD(SUBMODULE_HEADER);
	RECORD(SUBMODULE_TOPHEADER);
	RECORD(SUBMODULE_UMBRELLA_DIR);
	RECORD(SUBMODULE_IMPORTS);
	RECORD(SUBMODULE_EXPORTS);
	RECORD(SUBMODULE_REQUIRES);
	RECORD(SUBMODULE_EXCLUDED_HEADER);
	RECORD(SUBMODULE_LINK_LIBRARY);
	RECORD(SUBMODULE_CONFIG_MACRO);
	RECORD(SUBMODULE_CONFLICT);
	RECORD(SUBMODULE_PRIVATE_HEADER);
	RECORD(SUBMODULE_TEXTUAL_HEADER);
	RECORD(SUBMODULE_PRIVATE_TEXTUAL_HEADER);
	RECORD(SUBMODULE_INITIALIZERS);

	// Comments Block.
	BLOCK(COMMENTS_BLOCK);
	RECORD(COMMENTS_RAW_COMMENT);

	// Decls and Types block.
	BLOCK(DECLTYPES_BLOCK);
	RECORD(TYPE_EXT_QUAL);
	RECORD(TYPE_COMPLEX);
	RECORD(TYPE_POINTER);
	RECORD(TYPE_BLOCK_POINTER);
	RECORD(TYPE_LVALUE_REFERENCE);
	RECORD(TYPE_RVALUE_REFERENCE);
	RECORD(TYPE_MEMBER_POINTER);
	RECORD(TYPE_CONSTANT_ARRAY);
	RECORD(TYPE_INCOMPLETE_ARRAY);
	RECORD(TYPE_VARIABLE_ARRAY);
	RECORD(TYPE_VECTOR);
	RECORD(TYPE_EXT_VECTOR);
	RECORD(TYPE_FUNCTION_NO_PROTO);
	RECORD(TYPE_FUNCTION_PROTO);
	RECORD(TYPE_TYPEDEF);
	RECORD(TYPE_TYPEOF_EXPR);
	RECORD(TYPE_TYPEOF);
	RECORD(TYPE_RECORD);
	RECORD(TYPE_ENUM);
	RECORD(TYPE_OBJC_INTERFACE);
	RECORD(TYPE_OBJC_OBJECT_POINTER);
	RECORD(TYPE_DECLTYPE);
	RECORD(TYPE_ELABORATED);
	RECORD(TYPE_SUBST_TEMPLATE_TYPE_PARM);
	RECORD(TYPE_UNRESOLVED_USING);
	RECORD(TYPE_INJECTED_CLASS_NAME);
	RECORD(TYPE_OBJC_OBJECT);
	RECORD(TYPE_TEMPLATE_TYPE_PARM);
	RECORD(TYPE_TEMPLATE_SPECIALIZATION);
	RECORD(TYPE_DEPENDENT_NAME);
	RECORD(TYPE_DEPENDENT_TEMPLATE_SPECIALIZATION);
	RECORD(TYPE_DEPENDENT_SIZED_ARRAY);
	RECORD(TYPE_PAREN);
	RECORD(TYPE_PACK_EXPANSION);
	RECORD(TYPE_ATTRIBUTED);
	RECORD(TYPE_SUBST_TEMPLATE_TYPE_PARM_PACK);
	RECORD(TYPE_AUTO);
	RECORD(TYPE_UNARY_TRANSFORM);
	RECORD(TYPE_ATOMIC);
	RECORD(TYPE_DECAYED);
	RECORD(TYPE_ADJUSTED);
	RECORD(TYPE_OBJC_TYPE_PARAM);
	RECORD(LOCAL_REDECLARATIONS);
	RECORD(DECL_TYPEDEF);
	RECORD(DECL_TYPEALIAS);
	RECORD(DECL_ENUM);
	RECORD(DECL_RECORD);
	RECORD(DECL_ENUM_CONSTANT);
	RECORD(DECL_FUNCTION);
	RECORD(DECL_OBJC_METHOD);
	RECORD(DECL_OBJC_INTERFACE);
	RECORD(DECL_OBJC_PROTOCOL);
	RECORD(DECL_OBJC_IVAR);
	RECORD(DECL_OBJC_AT_DEFS_FIELD);
	RECORD(DECL_OBJC_CATEGORY);
	RECORD(DECL_OBJC_CATEGORY_IMPL);
	RECORD(DECL_OBJC_IMPLEMENTATION);
	RECORD(DECL_OBJC_COMPATIBLE_ALIAS);
	RECORD(DECL_OBJC_PROPERTY);
	RECORD(DECL_OBJC_PROPERTY_IMPL);
	RECORD(DECL_FIELD);
	RECORD(DECL_MS_PROPERTY);
	RECORD(DECL_VAR);
	RECORD(DECL_IMPLICIT_PARAM);
	RECORD(DECL_PARM_VAR);
	RECORD(DECL_FILE_SCOPE_ASM);
	RECORD(DECL_BLOCK);
	RECORD(DECL_CONTEXT_LEXICAL);
	RECORD(DECL_CONTEXT_VISIBLE);
	RECORD(DECL_NAMESPACE);
	RECORD(DECL_NAMESPACE_ALIAS);
	RECORD(DECL_USING);
	RECORD(DECL_USING_SHADOW);
	RECORD(DECL_USING_DIRECTIVE);
	RECORD(DECL_UNRESOLVED_USING_VALUE);
	RECORD(DECL_UNRESOLVED_USING_TYPENAME);
	RECORD(DECL_LINKAGE_SPEC);
	RECORD(DECL_CXX_RECORD);
	RECORD(DECL_CXX_METHOD);
	RECORD(DECL_CXX_CONSTRUCTOR);
	RECORD(DECL_CXX_INHERITED_CONSTRUCTOR);
	RECORD(DECL_CXX_DESTRUCTOR);
	RECORD(DECL_CXX_CONVERSION);
	RECORD(DECL_ACCESS_SPEC);
	RECORD(DECL_FRIEND);
	RECORD(DECL_FRIEND_TEMPLATE);
	RECORD(DECL_CLASS_TEMPLATE);
	RECORD(DECL_CLASS_TEMPLATE_SPECIALIZATION);
	RECORD(DECL_CLASS_TEMPLATE_PARTIAL_SPECIALIZATION);
	RECORD(DECL_VAR_TEMPLATE);
	RECORD(DECL_VAR_TEMPLATE_SPECIALIZATION);
	RECORD(DECL_VAR_TEMPLATE_PARTIAL_SPECIALIZATION);
	RECORD(DECL_FUNCTION_TEMPLATE);
	RECORD(DECL_TEMPLATE_TYPE_PARM);
	RECORD(DECL_NON_TYPE_TEMPLATE_PARM);
	RECORD(DECL_TEMPLATE_TEMPLATE_PARM);
	RECORD(DECL_TYPE_ALIAS_TEMPLATE);
	RECORD(DECL_STATIC_ASSERT);
	RECORD(DECL_CXX_BASE_SPECIFIERS);
	RECORD(DECL_CXX_CTOR_INITIALIZERS);
	RECORD(DECL_INDIRECTFIELD);
	RECORD(DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK);
	RECORD(DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK);
	RECORD(DECL_CLASS_SCOPE_FUNCTION_SPECIALIZATION);
	RECORD(DECL_IMPORT);
	RECORD(DECL_OMP_THREADPRIVATE);
	RECORD(DECL_EMPTY);
	RECORD(DECL_OBJC_TYPE_PARAM);
	RECORD(DECL_OMP_CAPTUREDEXPR);
	RECORD(DECL_PRAGMA_COMMENT);
	RECORD(DECL_PRAGMA_DETECT_MISMATCH);
	RECORD(DECL_OMP_DECLARE_REDUCTION);

	// Statements and Exprs can occur in the Decls and Types block.
	AddStmtsExprs(Stream, Record);

	BLOCK(PREPROCESSOR_DETAIL_BLOCK);
	RECORD(PPD_MACRO_EXPANSION);
	RECORD(PPD_MACRO_DEFINITION);
	RECORD(PPD_INCLUSION_DIRECTIVE);

	// Decls and Types block.
	BLOCK(EXTENSION_BLOCK);
	RECORD(EXTENSION_METADATA);

	BLOCK(UNHASHED_CONTROL_BLOCK);
	RECORD(SIGNATURE);
	RECORD(DIAGNOSTIC_OPTIONS);
	RECORD(DIAG_PRAGMA_MAPPINGS);

	#undef RECORD
	#undef BLOCK
	Stream.ExitBlock();
	}

	/// \brief Prepares a path for being written to an AST file by converting it
	/// to an absolute path and removing nested './'s.
	///
	/// \return \c true if the path was changed.
	static bool cleanPathForOutput(FileManager &FileMgr,
	SmallVectorImpl<char> &Path) {
	bool Changed = FileMgr.makeAbsolutePath(Path);
	return Changed \| llvm::sys::path::remove_dots(Path);
	}

	/// \brief Adjusts the given filename to only write out the portion of the
	/// filename that is not part of the system root directory.
	///
	/// \param Filename the file name to adjust.
	///
	/// \param BaseDir When non-NULL, the PCH file is a relocatable AST file and
	/// the returned filename will be adjusted by this root directory.
	///
	/// \returns either the original filename (if it needs no adjustment) or the
	/// adjusted filename (which points into the @p Filename parameter).
	static const char *
	adjustFilenameForRelocatableAST(const char *Filename, StringRef BaseDir) {
	assert(Filename && "No file name to adjust?");

	if (BaseDir.empty())
	return Filename;

	// Verify that the filename and the system root have the same prefix.
	unsigned Pos = 0;
	for (; Filename[Pos] && Pos < BaseDir.size(); ++Pos)
	if (Filename[Pos] != BaseDir[Pos])
	return Filename; // Prefixes don't match.

	// We hit the end of the filename before we hit the end of the system root.
	if (!Filename[Pos])
	return Filename;

	// If there's not a path separator at the end of the base directory nor
	// immediately after it, then this isn't within the base directory.
	if (!llvm::sys::path::is_separator(Filename[Pos])) {
	if (!llvm::sys::path::is_separator(BaseDir.back()))
	return Filename;
	} else {
	// If the file name has a '/' at the current position, skip over the '/'.
	// We distinguish relative paths from absolute paths by the
	// absence of '/' at the beginning of relative paths.
	//
	// FIXME: This is wrong. We distinguish them by asking if the path is
	// absolute, which isn't the same thing. And there might be multiple '/'s
	// in a row. Use a better mechanism to indicate whether we have emitted an
	// absolute or relative path.
	++Pos;
	}

	return Filename + Pos;
	}

	ASTFileSignature ASTWriter::createSignature(StringRef Bytes) {
	// Calculate the hash till start of UNHASHED_CONTROL_BLOCK.
	llvm::SHA1 Hasher;
	Hasher.update(ArrayRef<uint8_t>(Bytes.bytes_begin(), Bytes.size()));
	auto Hash = Hasher.result();

	// Convert to an array [5*i32].
	ASTFileSignature Signature;
	auto LShift = [&](unsigned char Val, unsigned Shift) {
	return (uint32_t)Val << Shift;
	};
	for (int I = 0; I != 5; ++I)
	Signature[I] = LShift(Hash[I * 4 + 0], 24) \| LShift(Hash[I * 4 + 1], 16) \|
	LShift(Hash[I * 4 + 2], 8) \| LShift(Hash[I * 4 + 3], 0);

	return Signature;
	}

	ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
	ASTContext &Context) {
	// Flush first to prepare the PCM hash (signature).
	Stream.FlushToWord();
	auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3;

	// Enter the block and prepare to write records.
	RecordData Record;
	Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5);

	// For implicit modules, write the hash of the PCM as its signature.
	ASTFileSignature Signature;
	if (WritingModule &&
	PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
	Signature = createSignature(StringRef(Buffer.begin(), StartOfUnhashedControl));
	Record.append(Signature.begin(), Signature.end());
	Stream.EmitRecord(SIGNATURE, Record);
	Record.clear();
	}

	// Diagnostic options.
	const auto &Diags = Context.getDiagnostics();
	const DiagnosticOptions &DiagOpts = Diags.getDiagnosticOptions();
	#define DIAGOPT(Name, Bits, Default) Record.push_back(DiagOpts.Name);
	#define ENUM_DIAGOPT(Name, Type, Bits, Default) \
	Record.push_back(static_cast<unsigned>(DiagOpts.get##Name()));
	#include "clang/Basic/DiagnosticOptions.def"
	Record.push_back(DiagOpts.Warnings.size());
	for (unsigned I = 0, N = DiagOpts.Warnings.size(); I != N; ++I)
	AddString(DiagOpts.Warnings[I], Record);
	Record.push_back(DiagOpts.Remarks.size());
	for (unsigned I = 0, N = DiagOpts.Remarks.size(); I != N; ++I)
	AddString(DiagOpts.Remarks[I], Record);
	// Note: we don't serialize the log or serialization file names, because they
	// are generally transient files and will almost always be overridden.
	Stream.EmitRecord(DIAGNOSTIC_OPTIONS, Record);

	// Write out the diagnostic/pragma mappings.
	WritePragmaDiagnosticMappings(Diags, /* IsModule = */ WritingModule);

	// Leave the options block.
	Stream.ExitBlock();
	return Signature;
	}

	/// \brief Write the control block.
	void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
	StringRef isysroot,
	const std::string &OutputFile) {
	using namespace llvm;
	Stream.EnterSubblock(CONTROL_BLOCK_ID, 5);
	RecordData Record;

	// Metadata
	auto MetadataAbbrev = std::make_shared<BitCodeAbbrev>();
	MetadataAbbrev->Add(BitCodeAbbrevOp(METADATA));
	MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Major
	MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Minor
	MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Clang maj.
	MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Clang min.
	MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Relocatable
	MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Timestamps
	MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Errors
	MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // SVN branch/tag
	unsigned MetadataAbbrevCode = Stream.EmitAbbrev(std::move(MetadataAbbrev));
	assert((!WritingModule \|\| isysroot.empty()) &&
	"writing module as a relocatable PCH?");
	{
	RecordData::value_type Record[] = {METADATA, VERSION_MAJOR, VERSION_MINOR,
	CLANG_VERSION_MAJOR, CLANG_VERSION_MINOR,
	!isysroot.empty(), IncludeTimestamps,
	ASTHasCompilerErrors};
	Stream.EmitRecordWithBlob(MetadataAbbrevCode, Record,
	getClangFullRepositoryVersion());
	}

	if (WritingModule) {
	// Module name
	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(MODULE_NAME));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
	unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
	RecordData::value_type Record[] = {MODULE_NAME};
	Stream.EmitRecordWithBlob(AbbrevCode, Record, WritingModule->Name);
	}

	if (WritingModule && WritingModule->Directory) {
	SmallString<128> BaseDir(WritingModule->Directory->getName());
	cleanPathForOutput(Context.getSourceManager().getFileManager(), BaseDir);

	// If the home of the module is the current working directory, then we
	// want to pick up the cwd of the build process loading the module, not
	// our cwd, when we load this module.
	if (!PP.getHeaderSearchInfo()
	.getHeaderSearchOpts()
	.ModuleMapFileHomeIsCwd \|\|
	WritingModule->Directory->getName() != StringRef(".")) {
	// Module directory.
	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(MODULE_DIRECTORY));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Directory
	unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));

	RecordData::value_type Record[] = {MODULE_DIRECTORY};
	Stream.EmitRecordWithBlob(AbbrevCode, Record, BaseDir);
	}

	// Write out all other paths relative to the base directory if possible.
	BaseDirectory.assign(BaseDir.begin(), BaseDir.end());
	} else if (!isysroot.empty()) {
	// Write out paths relative to the sysroot if possible.
	BaseDirectory = isysroot;
	}

	// Module map file
	if (WritingModule && WritingModule->Kind == Module::ModuleMapModule) {
	Record.clear();

	auto &Map = PP.getHeaderSearchInfo().getModuleMap();
	AddPath(WritingModule->PresumedModuleMapFile.empty()
	? Map.getModuleMapFileForUniquing(WritingModule)->getName()
	: StringRef(WritingModule->PresumedModuleMapFile),
	Record);

	// Additional module map files.
	if (auto *AdditionalModMaps =
	Map.getAdditionalModuleMapFiles(WritingModule)) {
	Record.push_back(AdditionalModMaps->size());
	for (const FileEntry F : AdditionalModMaps)
	AddPath(F->getName(), Record);
	} else {
	Record.push_back(0);
	}

	Stream.EmitRecord(MODULE_MAP_FILE, Record);
	}

	// Imports
	if (Chain) {
	serialization::ModuleManager &Mgr = Chain->getModuleManager();
	Record.clear();

	for (ModuleFile &M : Mgr) {
	// Skip modules that weren't directly imported.
	if (!M.isDirectlyImported())
	continue;

	Record.push_back((unsigned)M.Kind); // FIXME: Stable encoding
	AddSourceLocation(M.ImportLoc, Record);

	// If we have calculated signature, there is no need to store
	// the size or timestamp.
	Record.push_back(M.Signature ? 0 : M.File->getSize());
	Record.push_back(M.Signature ? 0 : getTimestampForOutput(M.File));

	for (auto I : M.Signature)
	Record.push_back(I);

	AddPath(M.FileName, Record);
	}
	Stream.EmitRecord(IMPORTS, Record);
	}

	// Write the options block.
	Stream.EnterSubblock(OPTIONS_BLOCK_ID, 4);

	// Language options.
	Record.clear();
	const LangOptions &LangOpts = Context.getLangOpts();
	#define LANGOPT(Name, Bits, Default, Description) \
	Record.push_back(LangOpts.Name);
	#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \
	Record.push_back(static_cast<unsigned>(LangOpts.get##Name()));
	#include "clang/Basic/LangOptions.def"
	#define SANITIZER(NAME, ID) \
	Record.push_back(LangOpts.Sanitize.has(SanitizerKind::ID));
	#include "clang/Basic/Sanitizers.def"

	Record.push_back(LangOpts.ModuleFeatures.size());
	for (StringRef Feature : LangOpts.ModuleFeatures)
	AddString(Feature, Record);

	Record.push_back((unsigned) LangOpts.ObjCRuntime.getKind());
	AddVersionTuple(LangOpts.ObjCRuntime.getVersion(), Record);

	AddString(LangOpts.CurrentModule, Record);

	// Comment options.
	Record.push_back(LangOpts.CommentOpts.BlockCommandNames.size());
	for (const auto &I : LangOpts.CommentOpts.BlockCommandNames) {
	AddString(I, Record);
	}
	Record.push_back(LangOpts.CommentOpts.ParseAllComments);

	// OpenMP offloading options.
	Record.push_back(LangOpts.OMPTargetTriples.size());
	for (auto &T : LangOpts.OMPTargetTriples)
	AddString(T.getTriple(), Record);

	AddString(LangOpts.OMPHostIRFile, Record);

	Stream.EmitRecord(LANGUAGE_OPTIONS, Record);

	// Target options.
	Record.clear();
	const TargetInfo &Target = Context.getTargetInfo();
	const TargetOptions &TargetOpts = Target.getTargetOpts();
	AddString(TargetOpts.Triple, Record);
	AddString(TargetOpts.CPU, Record);
	AddString(TargetOpts.ABI, Record);
	Record.push_back(TargetOpts.FeaturesAsWritten.size());
	for (unsigned I = 0, N = TargetOpts.FeaturesAsWritten.size(); I != N; ++I) {
	AddString(TargetOpts.FeaturesAsWritten[I], Record);
	}
	Record.push_back(TargetOpts.Features.size());
	for (unsigned I = 0, N = TargetOpts.Features.size(); I != N; ++I) {
	AddString(TargetOpts.Features[I], Record);
	}
	Stream.EmitRecord(TARGET_OPTIONS, Record);

	// File system options.
	Record.clear();
	const FileSystemOptions &FSOpts =
	Context.getSourceManager().getFileManager().getFileSystemOpts();
	AddString(FSOpts.WorkingDir, Record);
	Stream.EmitRecord(FILE_SYSTEM_OPTIONS, Record);

	// Header search options.
	Record.clear();
	const HeaderSearchOptions &HSOpts
	= PP.getHeaderSearchInfo().getHeaderSearchOpts();
	AddString(HSOpts.Sysroot, Record);

	// Include entries.
	Record.push_back(HSOpts.UserEntries.size());
	for (unsigned I = 0, N = HSOpts.UserEntries.size(); I != N; ++I) {
	const HeaderSearchOptions::Entry &Entry = HSOpts.UserEntries[I];
	AddString(Entry.Path, Record);
	Record.push_back(static_cast<unsigned>(Entry.Group));
	Record.push_back(Entry.IsFramework);
	Record.push_back(Entry.IgnoreSysRoot);
	}

	// System header prefixes.
	Record.push_back(HSOpts.SystemHeaderPrefixes.size());
	for (unsigned I = 0, N = HSOpts.SystemHeaderPrefixes.size(); I != N; ++I) {
	AddString(HSOpts.SystemHeaderPrefixes[I].Prefix, Record);
	Record.push_back(HSOpts.SystemHeaderPrefixes[I].IsSystemHeader);
	}

	AddString(HSOpts.ResourceDir, Record);
	AddString(HSOpts.ModuleCachePath, Record);
	AddString(HSOpts.ModuleUserBuildPath, Record);
	Record.push_back(HSOpts.DisableModuleHash);
	Record.push_back(HSOpts.ImplicitModuleMaps);
	Record.push_back(HSOpts.ModuleMapFileHomeIsCwd);
	Record.push_back(HSOpts.UseBuiltinIncludes);
	Record.push_back(HSOpts.UseStandardSystemIncludes);
	Record.push_back(HSOpts.UseStandardCXXIncludes);
	Record.push_back(HSOpts.UseLibcxx);
	// Write out the specific module cache path that contains the module files.
	AddString(PP.getHeaderSearchInfo().getModuleCachePath(), Record);
	Stream.EmitRecord(HEADER_SEARCH_OPTIONS, Record);

	// Preprocessor options.
	Record.clear();
	const PreprocessorOptions &PPOpts = PP.getPreprocessorOpts();

	// Macro definitions.
	Record.push_back(PPOpts.Macros.size());
	for (unsigned I = 0, N = PPOpts.Macros.size(); I != N; ++I) {
	AddString(PPOpts.Macros[I].first, Record);
	Record.push_back(PPOpts.Macros[I].second);
	}

	// Includes
	Record.push_back(PPOpts.Includes.size());
	for (unsigned I = 0, N = PPOpts.Includes.size(); I != N; ++I)
	AddString(PPOpts.Includes[I], Record);

	// Macro includes
	Record.push_back(PPOpts.MacroIncludes.size());
	for (unsigned I = 0, N = PPOpts.MacroIncludes.size(); I != N; ++I)
	AddString(PPOpts.MacroIncludes[I], Record);

	Record.push_back(PPOpts.UsePredefines);
	// Detailed record is important since it is used for the module cache hash.
	Record.push_back(PPOpts.DetailedRecord);
	AddString(PPOpts.ImplicitPCHInclude, Record);
	AddString(PPOpts.ImplicitPTHInclude, Record);
	Record.push_back(static_cast<unsigned>(PPOpts.ObjCXXARCStandardLibrary));
	Stream.EmitRecord(PREPROCESSOR_OPTIONS, Record);

	// Leave the options block.
	Stream.ExitBlock();

	// Original file name and file ID
	SourceManager &SM = Context.getSourceManager();
	if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) {
	auto FileAbbrev = std::make_shared<BitCodeAbbrev>();
	FileAbbrev->Add(BitCodeAbbrevOp(ORIGINAL_FILE));
	FileAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File ID
	FileAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
	unsigned FileAbbrevCode = Stream.EmitAbbrev(std::move(FileAbbrev));

	Record.clear();
	Record.push_back(ORIGINAL_FILE);
	Record.push_back(SM.getMainFileID().getOpaqueValue());
	EmitRecordWithPath(FileAbbrevCode, Record, MainFile->getName());
	}

	Record.clear();
	Record.push_back(SM.getMainFileID().getOpaqueValue());
	Stream.EmitRecord(ORIGINAL_FILE_ID, Record);

	// Original PCH directory
	if (!OutputFile.empty() && OutputFile != "-") {
	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(ORIGINAL_PCH_DIR));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
	unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));

	SmallString<128> OutputPath(OutputFile);

	SM.getFileManager().makeAbsolutePath(OutputPath);
	StringRef origDir = llvm::sys::path::parent_path(OutputPath);

	RecordData::value_type Record[] = {ORIGINAL_PCH_DIR};
	Stream.EmitRecordWithBlob(AbbrevCode, Record, origDir);
	}

	WriteInputFiles(Context.SourceMgr,
	PP.getHeaderSearchInfo().getHeaderSearchOpts(),
	PP.getLangOpts().Modules);
	Stream.ExitBlock();
	}

	namespace {

	/// \brief An input file.
	struct InputFileEntry {
	const FileEntry *File;
	bool IsSystemFile;
	bool IsTransient;
	bool BufferOverridden;
	bool IsTopLevelModuleMap;
	};

	} // end anonymous namespace

	void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
	HeaderSearchOptions &HSOpts,
	bool Modules) {
	using namespace llvm;
	Stream.EnterSubblock(INPUT_FILES_BLOCK_ID, 4);

	// Create input-file abbreviation.
	auto IFAbbrev = std::make_shared<BitCodeAbbrev>();
	IFAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE));
	IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID
	IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 12)); // Size
	IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // Modification time
	IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden
	IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient
	IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Module map
	IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
	unsigned IFAbbrevCode = Stream.EmitAbbrev(std::move(IFAbbrev));

	// Get all ContentCache objects for files, sorted by whether the file is a
	// system one or not. System files go at the back, users files at the front.
	std::deque<InputFileEntry> SortedFiles;
	for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size(); I != N; ++I) {
	// Get this source location entry.
	const SrcMgr::SLocEntry *SLoc = &SourceMgr.getLocalSLocEntry(I);
	assert(&SourceMgr.getSLocEntry(FileID::get(I)) == SLoc);

	// We only care about file entries that were not overridden.
	if (!SLoc->isFile())
	continue;
	const SrcMgr::FileInfo &File = SLoc->getFile();
	const SrcMgr::ContentCache *Cache = File.getContentCache();
	if (!Cache->OrigEntry)
	continue;

	InputFileEntry Entry;
	Entry.File = Cache->OrigEntry;
	Entry.IsSystemFile = Cache->IsSystemFile;
	Entry.IsTransient = Cache->IsTransient;
	Entry.BufferOverridden = Cache->BufferOverridden;
	Entry.IsTopLevelModuleMap = isModuleMap(File.getFileCharacteristic()) &&
	File.getIncludeLoc().isInvalid();
	if (Cache->IsSystemFile)
	SortedFiles.push_back(Entry);
	else
	SortedFiles.push_front(Entry);
	}

	unsigned UserFilesNum = 0;
	// Write out all of the input files.
	std::vector<uint64_t> InputFileOffsets;
	for (const auto &Entry : SortedFiles) {
	uint32_t &InputFileID = InputFileIDs[Entry.File];
	if (InputFileID != 0)
	continue; // already recorded this file.

	// Record this entry's offset.
	InputFileOffsets.push_back(Stream.GetCurrentBitNo());

	InputFileID = InputFileOffsets.size();

	if (!Entry.IsSystemFile)
	++UserFilesNum;

	// Emit size/modification time for this file.
	// And whether this file was overridden.
	RecordData::value_type Record[] = {
	INPUT_FILE,
	InputFileOffsets.size(),
	(uint64_t)Entry.File->getSize(),
	(uint64_t)getTimestampForOutput(Entry.File),
	Entry.BufferOverridden,
	Entry.IsTransient,
	Entry.IsTopLevelModuleMap};

	EmitRecordWithPath(IFAbbrevCode, Record, Entry.File->getName());
	}

	Stream.ExitBlock();

	// Create input file offsets abbreviation.
	auto OffsetsAbbrev = std::make_shared<BitCodeAbbrev>();
	OffsetsAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE_OFFSETS));
	OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # input files
	OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # non-system
	// input files
	OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Array
	unsigned OffsetsAbbrevCode = Stream.EmitAbbrev(std::move(OffsetsAbbrev));

	// Write input file offsets.
	RecordData::value_type Record[] = {INPUT_FILE_OFFSETS,
	InputFileOffsets.size(), UserFilesNum};
	Stream.EmitRecordWithBlob(OffsetsAbbrevCode, Record, bytes(InputFileOffsets));
	}

	//===----------------------------------------------------------------------===//
	// Source Manager Serialization
	//===----------------------------------------------------------------------===//

	/// \brief Create an abbreviation for the SLocEntry that refers to a
	/// file.
	static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) {
	using namespace llvm;

	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_FILE_ENTRY));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Characteristic
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Line directives
	// FileEntry fields.
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Input File ID
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumCreatedFIDs
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 24)); // FirstDeclIndex
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // NumDecls
	return Stream.EmitAbbrev(std::move(Abbrev));
	}

	/// \brief Create an abbreviation for the SLocEntry that refers to a
	/// buffer.
	static unsigned CreateSLocBufferAbbrev(llvm::BitstreamWriter &Stream) {
	using namespace llvm;

	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_ENTRY));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Characteristic
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Line directives
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Buffer name blob
	return Stream.EmitAbbrev(std::move(Abbrev));
	}

	/// \brief Create an abbreviation for the SLocEntry that refers to a
	/// buffer's blob.
	static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream,
	bool Compressed) {
	using namespace llvm;

	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(Compressed ? SM_SLOC_BUFFER_BLOB_COMPRESSED
	: SM_SLOC_BUFFER_BLOB));
	if (Compressed)
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Uncompressed size
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Blob
	return Stream.EmitAbbrev(std::move(Abbrev));
	}

	/// \brief Create an abbreviation for the SLocEntry that refers to a macro
	/// expansion.
	static unsigned CreateSLocExpansionAbbrev(llvm::BitstreamWriter &Stream) {
	using namespace llvm;

	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_EXPANSION_ENTRY));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Spelling location
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Start location
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // End location
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Token length
	return Stream.EmitAbbrev(std::move(Abbrev));
	}

	namespace {

	// Trait used for the on-disk hash table of header search information.
	class HeaderFileInfoTrait {
	ASTWriter &Writer;

	// Keep track of the framework names we've used during serialization.
	SmallVector<char, 128> FrameworkStringData;
	llvm::StringMap<unsigned> FrameworkNameOffset;

	public:
	HeaderFileInfoTrait(ASTWriter &Writer) : Writer(Writer) {}

	struct key_type {
	StringRef Filename;
	off_t Size;
	time_t ModTime;
	};
	typedef const key_type &key_type_ref;

	using UnresolvedModule =
	llvm::PointerIntPair<Module *, 2, ModuleMap::ModuleHeaderRole>;

	struct data_type {
	const HeaderFileInfo &HFI;
	ArrayRef<ModuleMap::KnownHeader> KnownHeaders;
	UnresolvedModule Unresolved;
	};
	typedef const data_type &data_type_ref;

	typedef unsigned hash_value_type;
	typedef unsigned offset_type;

	hash_value_type ComputeHash(key_type_ref key) {
	// The hash is based only on size/time of the file, so that the reader can
	// match even when symlinking or excess path elements ("foo/../", "../")
	// change the form of the name. However, complete path is still the key.
	return llvm::hash_combine(key.Size, key.ModTime);
	}

	std::pair<unsigned,unsigned>
	EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
	using namespace llvm::support;
	endian::Writer<little> LE(Out);
	unsigned KeyLen = key.Filename.size() + 1 + 8 + 8;
	LE.write<uint16_t>(KeyLen);
	unsigned DataLen = 1 + 2 + 4 + 4;
	for (auto ModInfo : Data.KnownHeaders)
	if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule()))
	DataLen += 4;
	if (Data.Unresolved.getPointer())
	DataLen += 4;
	LE.write<uint8_t>(DataLen);
	return std::make_pair(KeyLen, DataLen);
	}

	void EmitKey(raw_ostream& Out, key_type_ref key, unsigned KeyLen) {
	using namespace llvm::support;
	endian::Writer<little> LE(Out);
	LE.write<uint64_t>(key.Size);
	KeyLen -= 8;
	LE.write<uint64_t>(key.ModTime);
	KeyLen -= 8;
	Out.write(key.Filename.data(), KeyLen);
	}

	void EmitData(raw_ostream &Out, key_type_ref key,
	data_type_ref Data, unsigned DataLen) {
	using namespace llvm::support;
	endian::Writer<little> LE(Out);
	uint64_t Start = Out.tell(); (void)Start;

	unsigned char Flags = (Data.HFI.isImport << 5)
	\| (Data.HFI.isPragmaOnce << 4)
	\| (Data.HFI.DirInfo << 1)
	\| Data.HFI.IndexHeaderMapHeader;
	LE.write<uint8_t>(Flags);
	LE.write<uint16_t>(Data.HFI.NumIncludes);

	if (!Data.HFI.ControllingMacro)
	LE.write<uint32_t>(Data.HFI.ControllingMacroID);
	else
	LE.write<uint32_t>(Writer.getIdentifierRef(Data.HFI.ControllingMacro));

	unsigned Offset = 0;
	if (!Data.HFI.Framework.empty()) {
	// If this header refers into a framework, save the framework name.
	llvm::StringMap<unsigned>::iterator Pos
	= FrameworkNameOffset.find(Data.HFI.Framework);
	if (Pos == FrameworkNameOffset.end()) {
	Offset = FrameworkStringData.size() + 1;
	FrameworkStringData.append(Data.HFI.Framework.begin(),
	Data.HFI.Framework.end());
	FrameworkStringData.push_back(0);

	FrameworkNameOffset[Data.HFI.Framework] = Offset;
	} else
	Offset = Pos->second;
	}
	LE.write<uint32_t>(Offset);

	auto EmitModule = [&](Module *M, ModuleMap::ModuleHeaderRole Role) {
	if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M)) {
	uint32_t Value = (ModID << 2) \| (unsigned)Role;
	assert((Value >> 2) == ModID && "overflow in header module info");
	LE.write<uint32_t>(Value);
	}
	};

	// FIXME: If the header is excluded, we should write out some
	// record of that fact.
	for (auto ModInfo : Data.KnownHeaders)
	EmitModule(ModInfo.getModule(), ModInfo.getRole());
	if (Data.Unresolved.getPointer())
	EmitModule(Data.Unresolved.getPointer(), Data.Unresolved.getInt());

	assert(Out.tell() - Start == DataLen && "Wrong data length");
	}

	const char *strings_begin() const { return FrameworkStringData.begin(); }
	const char *strings_end() const { return FrameworkStringData.end(); }
	};

	} // end anonymous namespace

	/// \brief Write the header search block for the list of files that
	///
	/// \param HS The header search structure to save.
	void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
	HeaderFileInfoTrait GeneratorTrait(*this);
	llvm::OnDiskChainedHashTableGenerator<HeaderFileInfoTrait> Generator;
	SmallVector<const char *, 4> SavedStrings;
	unsigned NumHeaderSearchEntries = 0;

	// Find all unresolved headers for the current module. We generally will
	// have resolved them before we get here, but not necessarily: we might be
	// compiling a preprocessed module, where there is no requirement for the
	// original files to exist any more.
	const HeaderFileInfo Empty; // So we can take a reference.
	if (WritingModule) {
	llvm::SmallVector<Module *, 16> Worklist(1, WritingModule);
	while (!Worklist.empty()) {
	Module *M = Worklist.pop_back_val();
	if (!M->isAvailable())
	continue;

	// Map to disk files where possible, to pick up any missing stat
	// information. This also means we don't need to check the unresolved
	// headers list when emitting resolved headers in the first loop below.
	// FIXME: It'd be preferable to avoid doing this if we were given
	// sufficient stat information in the module map.
	HS.getModuleMap().resolveHeaderDirectives(M);

	// If the file didn't exist, we can still create a module if we were given
	// enough information in the module map.
	for (auto U : M->MissingHeaders) {
	// Check that we were given enough information to build a module
	// without this file existing on disk.
	if (!U.Size \|\| (!U.ModTime && IncludeTimestamps)) {
	PP->Diag(U.FileNameLoc, diag::err_module_no_size_mtime_for_header)
	<< WritingModule->getFullModuleName() << U.Size.hasValue()
	<< U.FileName;
	continue;
	}

	// Form the effective relative pathname for the file.
	SmallString<128> Filename(M->Directory->getName());
	llvm::sys::path::append(Filename, U.FileName);
	PreparePathForOutput(Filename);

	StringRef FilenameDup = strdup(Filename.c_str());
	SavedStrings.push_back(FilenameDup.data());

	HeaderFileInfoTrait::key_type Key = {
	FilenameDup, U.Size, IncludeTimestamps ? U.ModTime : 0
	};
	HeaderFileInfoTrait::data_type Data = {
	Empty, {}, {M, ModuleMap::headerKindToRole(U.Kind)}
	};
	// FIXME: Deal with cases where there are multiple unresolved header
	// directives in different submodules for the same header.
	Generator.insert(Key, Data, GeneratorTrait);
	++NumHeaderSearchEntries;
	}

	Worklist.append(M->submodule_begin(), M->submodule_end());
	}
	}

	SmallVector<const FileEntry *, 16> FilesByUID;
	HS.getFileMgr().GetUniqueIDMapping(FilesByUID);

	if (FilesByUID.size() > HS.header_file_size())
	FilesByUID.resize(HS.header_file_size());

	for (unsigned UID = 0, LastUID = FilesByUID.size(); UID != LastUID; ++UID) {
	const FileEntry *File = FilesByUID[UID];
	if (!File)
	continue;

	// Get the file info. This will load info from the external source if
	// necessary. Skip emitting this file if we have no information on it
	// as a header file (in which case HFI will be null) or if it hasn't
	// changed since it was loaded. Also skip it if it's for a modular header
	// from a different module; in that case, we rely on the module(s)
	// containing the header to provide this information.
	const HeaderFileInfo *HFI =
	HS.getExistingFileInfo(File, /WantExternal/!Chain);
	if (!HFI \|\| (HFI->isModuleHeader && !HFI->isCompilingModuleHeader))
	continue;

	// Massage the file path into an appropriate form.
	StringRef Filename = File->getName();
	SmallString<128> FilenameTmp(Filename);
	if (PreparePathForOutput(FilenameTmp)) {
	// If we performed any translation on the file name at all, we need to
	// save this string, since the generator will refer to it later.
	Filename = StringRef(strdup(FilenameTmp.c_str()));
	SavedStrings.push_back(Filename.data());
	}

	HeaderFileInfoTrait::key_type Key = {
	Filename, File->getSize(), getTimestampForOutput(File)
	};
	HeaderFileInfoTrait::data_type Data = {
	*HFI, HS.getModuleMap().findAllModulesForHeader(File), {}
	};
	Generator.insert(Key, Data, GeneratorTrait);
	++NumHeaderSearchEntries;
	}

	// Create the on-disk hash table in a buffer.
	SmallString<4096> TableData;
	uint32_t BucketOffset;
	{
	using namespace llvm::support;
	llvm::raw_svector_ostream Out(TableData);
	// Make sure that no bucket is at offset 0
	endian::Writer<little>(Out).write<uint32_t>(0);
	BucketOffset = Generator.Emit(Out, GeneratorTrait);
	}

	// Create a blob abbreviation
	using namespace llvm;

	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_TABLE));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
	unsigned TableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	// Write the header search table
	RecordData::value_type Record[] = {HEADER_SEARCH_TABLE, BucketOffset,
	NumHeaderSearchEntries, TableData.size()};
	TableData.append(GeneratorTrait.strings_begin(),GeneratorTrait.strings_end());
	Stream.EmitRecordWithBlob(TableAbbrev, Record, TableData);

	// Free all of the strings we had to duplicate.
	for (unsigned I = 0, N = SavedStrings.size(); I != N; ++I)
	free(const_cast<char *>(SavedStrings[I]));
	}

	static void emitBlob(llvm::BitstreamWriter &Stream, StringRef Blob,
	unsigned SLocBufferBlobCompressedAbbrv,
	unsigned SLocBufferBlobAbbrv) {
	typedef ASTWriter::RecordData::value_type RecordDataType;

	// Compress the buffer if possible. We expect that almost all PCM
	// consumers will not want its contents.
	SmallString<0> CompressedBuffer;
	if (llvm::zlib::isAvailable()) {
	llvm::Error E = llvm::zlib::compress(Blob.drop_back(1), CompressedBuffer);
	if (!E) {
	RecordDataType Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED,
	Blob.size() - 1};
	Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record,
	CompressedBuffer);
	return;
	}
	llvm::consumeError(std::move(E));
	}

	RecordDataType Record[] = {SM_SLOC_BUFFER_BLOB};
	Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record, Blob);
	}

	/// \brief Writes the block containing the serialized form of the
	/// source manager.
	///
	/// TODO: We should probably use an on-disk hash table (stored in a
	/// blob), indexed based on the file name, so that we only create
	/// entries for files that we actually need. In the common case (no
	/// errors), we probably won't have to create file entries for any of
	/// the files in the AST.
	void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
	const Preprocessor &PP) {
	RecordData Record;

	// Enter the source manager block.
	Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 4);

	// Abbreviations for the various kinds of source-location entries.
	unsigned SLocFileAbbrv = CreateSLocFileAbbrev(Stream);
	unsigned SLocBufferAbbrv = CreateSLocBufferAbbrev(Stream);
	unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream, false);
	unsigned SLocBufferBlobCompressedAbbrv =
	CreateSLocBufferBlobAbbrev(Stream, true);
	unsigned SLocExpansionAbbrv = CreateSLocExpansionAbbrev(Stream);

	// Write out the source location entry table. We skip the first
	// entry, which is always the same dummy entry.
	std::vector<uint32_t> SLocEntryOffsets;
	RecordData PreloadSLocs;
	SLocEntryOffsets.reserve(SourceMgr.local_sloc_entry_size() - 1);
	for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size();
	I != N; ++I) {
	// Get this source location entry.
	const SrcMgr::SLocEntry *SLoc = &SourceMgr.getLocalSLocEntry(I);
	FileID FID = FileID::get(I);
	assert(&SourceMgr.getSLocEntry(FID) == SLoc);

	// Record the offset of this source-location entry.
	SLocEntryOffsets.push_back(Stream.GetCurrentBitNo());

	// Figure out which record code to use.
	unsigned Code;
	if (SLoc->isFile()) {
	const SrcMgr::ContentCache *Cache = SLoc->getFile().getContentCache();
	if (Cache->OrigEntry) {
	Code = SM_SLOC_FILE_ENTRY;
	} else
	Code = SM_SLOC_BUFFER_ENTRY;
	} else
	Code = SM_SLOC_EXPANSION_ENTRY;
	Record.clear();
	Record.push_back(Code);

	// Starting offset of this entry within this module, so skip the dummy.
	Record.push_back(SLoc->getOffset() - 2);
	if (SLoc->isFile()) {
	const SrcMgr::FileInfo &File = SLoc->getFile();
	AddSourceLocation(File.getIncludeLoc(), Record);
	Record.push_back(File.getFileCharacteristic()); // FIXME: stable encoding
	Record.push_back(File.hasLineDirectives());

	const SrcMgr::ContentCache *Content = File.getContentCache();
	bool EmitBlob = false;
	if (Content->OrigEntry) {
	assert(Content->OrigEntry == Content->ContentsEntry &&
	"Writing to AST an overridden file is not supported");

	// The source location entry is a file. Emit input file ID.
	assert(InputFileIDs[Content->OrigEntry] != 0 && "Missed file entry");
	Record.push_back(InputFileIDs[Content->OrigEntry]);

	Record.push_back(File.NumCreatedFIDs);

	FileDeclIDsTy::iterator FDI = FileDeclIDs.find(FID);
	if (FDI != FileDeclIDs.end()) {
	Record.push_back(FDI->second->FirstDeclIndex);
	Record.push_back(FDI->second->DeclIDs.size());
	} else {
	Record.push_back(0);
	Record.push_back(0);
	}

	Stream.EmitRecordWithAbbrev(SLocFileAbbrv, Record);

	if (Content->BufferOverridden \|\| Content->IsTransient)
	EmitBlob = true;
	} else {
	// The source location entry is a buffer. The blob associated
	// with this entry contains the contents of the buffer.

	// We add one to the size so that we capture the trailing NULL
	// that is required by llvm::MemoryBuffer::getMemBuffer (on
	// the reader side).
	const llvm::MemoryBuffer *Buffer
	= Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
	StringRef Name = Buffer->getBufferIdentifier();
	Stream.EmitRecordWithBlob(SLocBufferAbbrv, Record,
	StringRef(Name.data(), Name.size() + 1));
	EmitBlob = true;

	if (Name == "<built-in>")
	PreloadSLocs.push_back(SLocEntryOffsets.size());
	}

	if (EmitBlob) {
	// Include the implicit terminating null character in the on-disk buffer
	// if we're writing it uncompressed.
	const llvm::MemoryBuffer *Buffer =
	Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
	StringRef Blob(Buffer->getBufferStart(), Buffer->getBufferSize() + 1);
	emitBlob(Stream, Blob, SLocBufferBlobCompressedAbbrv,
	SLocBufferBlobAbbrv);
	}
	} else {
	// The source location entry is a macro expansion.
	const SrcMgr::ExpansionInfo &Expansion = SLoc->getExpansion();
	AddSourceLocation(Expansion.getSpellingLoc(), Record);
	AddSourceLocation(Expansion.getExpansionLocStart(), Record);
	AddSourceLocation(Expansion.isMacroArgExpansion()
	? SourceLocation()
	: Expansion.getExpansionLocEnd(),
	Record);

	// Compute the token length for this macro expansion.
	unsigned NextOffset = SourceMgr.getNextLocalOffset();
	if (I + 1 != N)
	NextOffset = SourceMgr.getLocalSLocEntry(I + 1).getOffset();
	Record.push_back(NextOffset - SLoc->getOffset() - 1);
	Stream.EmitRecordWithAbbrev(SLocExpansionAbbrv, Record);
	}
	}

	Stream.ExitBlock();

	if (SLocEntryOffsets.empty())
	return;

	// Write the source-location offsets table into the AST block. This
	// table is used for lazily loading source-location information.
	using namespace llvm;

	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SOURCE_LOCATION_OFFSETS));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // # of slocs
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // total size
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // offsets
	unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
	{
	RecordData::value_type Record[] = {
	SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(),
	SourceMgr.getNextLocalOffset() - 1 /* skip dummy */};
	Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record,
	bytes(SLocEntryOffsets));
	}
	// Write the source location entry preloads array, telling the AST
	// reader which source locations entries it should load eagerly.
	Stream.EmitRecord(SOURCE_LOCATION_PRELOADS, PreloadSLocs);

	// Write the line table. It depends on remapping working, so it must come
	// after the source location offsets.
	if (SourceMgr.hasLineTable()) {
	LineTableInfo &LineTable = SourceMgr.getLineTable();

	Record.clear();

	// Emit the needed file names.
	llvm::DenseMap<int, int> FilenameMap;
	for (const auto &L : LineTable) {
	if (L.first.ID < 0)
	continue;
	for (auto &LE : L.second) {
	if (FilenameMap.insert(std::make_pair(LE.FilenameID,
	FilenameMap.size())).second)
	AddPath(LineTable.getFilename(LE.FilenameID), Record);
	}
	}
	Record.push_back(0);

	// Emit the line entries
	for (const auto &L : LineTable) {
	// Only emit entries for local files.
	if (L.first.ID < 0)
	continue;

	// Emit the file ID
	Record.push_back(L.first.ID);

	// Emit the line entries
	Record.push_back(L.second.size());
	for (const auto &LE : L.second) {
	Record.push_back(LE.FileOffset);
	Record.push_back(LE.LineNo);
	Record.push_back(FilenameMap[LE.FilenameID]);
	Record.push_back((unsigned)LE.FileKind);
	Record.push_back(LE.IncludeOffset);
	}
	}

	Stream.EmitRecord(SOURCE_MANAGER_LINE_TABLE, Record);
	}
	}

	//===----------------------------------------------------------------------===//
	// Preprocessor Serialization
	//===----------------------------------------------------------------------===//

	static bool shouldIgnoreMacro(MacroDirective *MD, bool IsModule,
	const Preprocessor &PP) {
	if (MacroInfo *MI = MD->getMacroInfo())
	if (MI->isBuiltinMacro())
	return true;

	if (IsModule) {
	SourceLocation Loc = MD->getLocation();
	if (Loc.isInvalid())
	return true;
	if (PP.getSourceManager().getFileID(Loc) == PP.getPredefinesFileID())
	return true;
	}

	return false;
	}

	/// \brief Writes the block containing the serialized form of the
	/// preprocessor.
	///
	void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
	PreprocessingRecord *PPRec = PP.getPreprocessingRecord();
	if (PPRec)
	WritePreprocessorDetail(*PPRec);

	RecordData Record;
	RecordData ModuleMacroRecord;

	// If the preprocessor __COUNTER__ value has been bumped, remember it.
	if (PP.getCounterValue() != 0) {
	RecordData::value_type Record[] = {PP.getCounterValue()};
	Stream.EmitRecord(PP_COUNTER_VALUE, Record);
	}

	if (PP.isRecordingPreamble() && PP.hasRecordedPreamble()) {
	assert(!IsModule);
	for (const auto &Cond : PP.getPreambleConditionalStack()) {
	AddSourceLocation(Cond.IfLoc, Record);
	Record.push_back(Cond.WasSkipping);
	Record.push_back(Cond.FoundNonSkip);
	Record.push_back(Cond.FoundElse);
	}
	Stream.EmitRecord(PP_CONDITIONAL_STACK, Record);
	Record.clear();
	}

	// Enter the preprocessor block.
	Stream.EnterSubblock(PREPROCESSOR_BLOCK_ID, 3);

	// If the AST file contains __DATE__ or __TIME__ emit a warning about this.
	// FIXME: Include a location for the use, and say which one was used.
	if (PP.SawDateOrTime())
	PP.Diag(SourceLocation(), diag::warn_module_uses_date_time) << IsModule;

	// Loop over all the macro directives that are live at the end of the file,
	// emitting each to the PP section.

	// Construct the list of identifiers with macro directives that need to be
	// serialized.
	SmallVector<const IdentifierInfo *, 128> MacroIdentifiers;
	for (auto &Id : PP.getIdentifierTable())
	if (Id.second->hadMacroDefinition() &&
	(!Id.second->isFromAST() \|\|
	Id.second->hasChangedSinceDeserialization()))
	MacroIdentifiers.push_back(Id.second);
	// Sort the set of macro definitions that need to be serialized by the
	// name of the macro, to provide a stable ordering.
	std::sort(MacroIdentifiers.begin(), MacroIdentifiers.end(),
	llvm::less_ptr<IdentifierInfo>());

	// Emit the macro directives as a list and associate the offset with the
	// identifier they belong to.
	for (const IdentifierInfo *Name : MacroIdentifiers) {
	MacroDirective *MD = PP.getLocalMacroDirectiveHistory(Name);
	auto StartOffset = Stream.GetCurrentBitNo();

	// Emit the macro directives in reverse source order.
	for (; MD; MD = MD->getPrevious()) {
	// Once we hit an ignored macro, we're done: the rest of the chain
	// will all be ignored macros.
	if (shouldIgnoreMacro(MD, IsModule, PP))
	break;

	AddSourceLocation(MD->getLocation(), Record);
	Record.push_back(MD->getKind());
	if (auto *DefMD = dyn_cast<DefMacroDirective>(MD)) {
	Record.push_back(getMacroRef(DefMD->getInfo(), Name));
	} else if (auto *VisMD = dyn_cast<VisibilityMacroDirective>(MD)) {
	Record.push_back(VisMD->isPublic());
	}
	}

	// Write out any exported module macros.
	bool EmittedModuleMacros = false;
	// We write out exported module macros for PCH as well.
	auto Leafs = PP.getLeafModuleMacros(Name);
	SmallVector<ModuleMacro*, 8> Worklist(Leafs.begin(), Leafs.end());
	llvm::DenseMap<ModuleMacro*, unsigned> Visits;
	while (!Worklist.empty()) {
	auto *Macro = Worklist.pop_back_val();

	// Emit a record indicating this submodule exports this macro.
	ModuleMacroRecord.push_back(
	getSubmoduleID(Macro->getOwningModule()));
	ModuleMacroRecord.push_back(getMacroRef(Macro->getMacroInfo(), Name));
	for (auto *M : Macro->overrides())
	ModuleMacroRecord.push_back(getSubmoduleID(M->getOwningModule()));

	Stream.EmitRecord(PP_MODULE_MACRO, ModuleMacroRecord);
	ModuleMacroRecord.clear();

	// Enqueue overridden macros once we've visited all their ancestors.
	for (auto *M : Macro->overrides())
	if (++Visits[M] == M->getNumOverridingMacros())
	Worklist.push_back(M);

	EmittedModuleMacros = true;
	}

	if (Record.empty() && !EmittedModuleMacros)
	continue;

	IdentMacroDirectivesOffsetMap[Name] = StartOffset;
	Stream.EmitRecord(PP_MACRO_DIRECTIVE_HISTORY, Record);
	Record.clear();
	}

	/// \brief Offsets of each of the macros into the bitstream, indexed by
	/// the local macro ID
	///
	/// For each identifier that is associated with a macro, this map
	/// provides the offset into the bitstream where that macro is
	/// defined.
	std::vector<uint32_t> MacroOffsets;

	for (unsigned I = 0, N = MacroInfosToEmit.size(); I != N; ++I) {
	const IdentifierInfo *Name = MacroInfosToEmit[I].Name;
	MacroInfo *MI = MacroInfosToEmit[I].MI;
	MacroID ID = MacroInfosToEmit[I].ID;

	if (ID < FirstMacroID) {
	assert(0 && "Loaded MacroInfo entered MacroInfosToEmit ?");
	continue;
	}

	// Record the local offset of this macro.
	unsigned Index = ID - FirstMacroID;
	if (Index == MacroOffsets.size())
	MacroOffsets.push_back(Stream.GetCurrentBitNo());
	else {
	if (Index > MacroOffsets.size())
	MacroOffsets.resize(Index + 1);

	MacroOffsets[Index] = Stream.GetCurrentBitNo();
	}

	AddIdentifierRef(Name, Record);
	AddSourceLocation(MI->getDefinitionLoc(), Record);
	AddSourceLocation(MI->getDefinitionEndLoc(), Record);
	Record.push_back(MI->isUsed());
	Record.push_back(MI->isUsedForHeaderGuard());
	unsigned Code;
	if (MI->isObjectLike()) {
	Code = PP_MACRO_OBJECT_LIKE;
	} else {
	Code = PP_MACRO_FUNCTION_LIKE;

	Record.push_back(MI->isC99Varargs());
	Record.push_back(MI->isGNUVarargs());
	Record.push_back(MI->hasCommaPasting());
	Record.push_back(MI->getNumParams());
	for (const IdentifierInfo *Param : MI->params())
	AddIdentifierRef(Param, Record);
	}

	// If we have a detailed preprocessing record, record the macro definition
	// ID that corresponds to this macro.
	if (PPRec)
	Record.push_back(MacroDefinitions[PPRec->findMacroDefinition(MI)]);

	Stream.EmitRecord(Code, Record);
	Record.clear();

	// Emit the tokens array.
	for (unsigned TokNo = 0, e = MI->getNumTokens(); TokNo != e; ++TokNo) {
	// Note that we know that the preprocessor does not have any annotation
	// tokens in it because they are created by the parser, and thus can't
	// be in a macro definition.
	const Token &Tok = MI->getReplacementToken(TokNo);
	AddToken(Tok, Record);
	Stream.EmitRecord(PP_TOKEN, Record);
	Record.clear();
	}
	++NumMacros;
	}

	Stream.ExitBlock();

	// Write the offsets table for macro IDs.
	using namespace llvm;

	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));

	unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
	{
	RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(),
	FirstMacroID - NUM_PREDEF_MACRO_IDS};
	Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
	}
	}

	void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
	if (PPRec.local_begin() == PPRec.local_end())
	return;

	SmallVector<PPEntityOffset, 64> PreprocessedEntityOffsets;

	// Enter the preprocessor block.
	Stream.EnterSubblock(PREPROCESSOR_DETAIL_BLOCK_ID, 3);

	// If the preprocessor has a preprocessing record, emit it.
	unsigned NumPreprocessingRecords = 0;
	using namespace llvm;

	// Set up the abbreviation for
	unsigned InclusionAbbrev = 0;
	{
	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(PPD_INCLUSION_DIRECTIVE));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // filename length
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // in quotes
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // kind
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // imported module
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
	InclusionAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
	}

	unsigned FirstPreprocessorEntityID
	= (Chain ? PPRec.getNumLoadedPreprocessedEntities() : 0)
	+ NUM_PREDEF_PP_ENTITY_IDS;
	unsigned NextPreprocessorEntityID = FirstPreprocessorEntityID;
	RecordData Record;
	for (PreprocessingRecord::iterator E = PPRec.local_begin(),
	EEnd = PPRec.local_end();
	E != EEnd;
	(void)++E, ++NumPreprocessingRecords, ++NextPreprocessorEntityID) {
	Record.clear();

	PreprocessedEntityOffsets.push_back(
	PPEntityOffset((*E)->getSourceRange(), Stream.GetCurrentBitNo()));

	if (auto MD = dyn_cast<MacroDefinitionRecord>(E)) {
	// Record this macro definition's ID.
	MacroDefinitions[MD] = NextPreprocessorEntityID;

	AddIdentifierRef(MD->getName(), Record);
	Stream.EmitRecord(PPD_MACRO_DEFINITION, Record);
	continue;
	}

	if (auto ME = dyn_cast<MacroExpansion>(E)) {
	Record.push_back(ME->isBuiltinMacro());
	if (ME->isBuiltinMacro())
	AddIdentifierRef(ME->getName(), Record);
	else
	Record.push_back(MacroDefinitions[ME->getDefinition()]);
	Stream.EmitRecord(PPD_MACRO_EXPANSION, Record);
	continue;
	}

	if (auto ID = dyn_cast<InclusionDirective>(E)) {
	Record.push_back(PPD_INCLUSION_DIRECTIVE);
	Record.push_back(ID->getFileName().size());
	Record.push_back(ID->wasInQuotes());
	Record.push_back(static_cast<unsigned>(ID->getKind()));
	Record.push_back(ID->importedModule());
	SmallString<64> Buffer;
	Buffer += ID->getFileName();
	// Check that the FileEntry is not null because it was not resolved and
	// we create a PCH even with compiler errors.
	if (ID->getFile())
	Buffer += ID->getFile()->getName();
	Stream.EmitRecordWithBlob(InclusionAbbrev, Record, Buffer);
	continue;
	}

	llvm_unreachable("Unhandled PreprocessedEntity in ASTWriter");
	}
	Stream.ExitBlock();

	// Write the offsets table for the preprocessing record.
	if (NumPreprocessingRecords > 0) {
	assert(PreprocessedEntityOffsets.size() == NumPreprocessingRecords);

	// Write the offsets table for identifier IDs.
	using namespace llvm;

	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(PPD_ENTITIES_OFFSETS));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first pp entity
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
	unsigned PPEOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	RecordData::value_type Record[] = {PPD_ENTITIES_OFFSETS,
	FirstPreprocessorEntityID -
	NUM_PREDEF_PP_ENTITY_IDS};
	Stream.EmitRecordWithBlob(PPEOffsetAbbrev, Record,
	bytes(PreprocessedEntityOffsets));
	}
	}

	unsigned ASTWriter::getLocalOrImportedSubmoduleID(Module *Mod) {
	if (!Mod)
	return 0;

	llvm::DenseMap<Module *, unsigned>::iterator Known = SubmoduleIDs.find(Mod);
	if (Known != SubmoduleIDs.end())
	return Known->second;

	auto *Top = Mod->getTopLevelModule();
	if (Top != WritingModule &&
	(getLangOpts().CompilingPCH \|\|
	!Top->fullModuleNameIs(StringRef(getLangOpts().CurrentModule))))
	return 0;

	return SubmoduleIDs[Mod] = NextSubmoduleID++;
	}

	unsigned ASTWriter::getSubmoduleID(Module *Mod) {
	// FIXME: This can easily happen, if we have a reference to a submodule that
	// did not result in us loading a module file for that submodule. For
	// instance, a cross-top-level-module 'conflict' declaration will hit this.
	unsigned ID = getLocalOrImportedSubmoduleID(Mod);
	assert((ID \|\| !Mod) &&
	"asked for module ID for non-local, non-imported module");
	return ID;
	}

	/// \brief Compute the number of modules within the given tree (including the
	/// given module).
	static unsigned getNumberOfModules(Module *Mod) {
	unsigned ChildModules = 0;
	for (auto Sub = Mod->submodule_begin(), SubEnd = Mod->submodule_end();
	Sub != SubEnd; ++Sub)
	ChildModules += getNumberOfModules(*Sub);

	return ChildModules + 1;
	}

	void ASTWriter::WriteSubmodules(Module *WritingModule) {
	// Enter the submodule description block.
	Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /bits for abbreviations/5);

	// Write the abbreviations needed for the submodules block.
	using namespace llvm;

	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_DEFINITION));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Parent
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsExplicit
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsSystem
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsExternC
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // InferSubmodules...
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // InferExplicit...
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // InferExportWild...
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ConfigMacrosExh...
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
	unsigned DefinitionAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_UMBRELLA_HEADER));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
	unsigned UmbrellaAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_HEADER));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
	unsigned HeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_TOPHEADER));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
	unsigned TopHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_UMBRELLA_DIR));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
	unsigned UmbrellaDirAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_REQUIRES));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // State
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Feature
	unsigned RequiresAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_EXCLUDED_HEADER));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
	unsigned ExcludedHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_TEXTUAL_HEADER));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
	unsigned TextualHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_PRIVATE_HEADER));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
	unsigned PrivateHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_PRIVATE_TEXTUAL_HEADER));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
	unsigned PrivateTextualHeaderAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_LINK_LIBRARY));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
	unsigned LinkLibraryAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CONFIG_MACRO));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Macro name
	unsigned ConfigMacroAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CONFLICT));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Other module
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Message
	unsigned ConflictAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	// Write the submodule metadata block.
	RecordData::value_type Record[] = {
	getNumberOfModules(WritingModule),
	FirstSubmoduleID - NUM_PREDEF_SUBMODULE_IDS,
	(unsigned)WritingModule->Kind};
	Stream.EmitRecord(SUBMODULE_METADATA, Record);

	// Write all of the submodules.
	std::queue<Module *> Q;
	Q.push(WritingModule);
	while (!Q.empty()) {
	Module *Mod = Q.front();
	Q.pop();
	unsigned ID = getSubmoduleID(Mod);

	uint64_t ParentID = 0;
	if (Mod->Parent) {
	assert(SubmoduleIDs[Mod->Parent] && "Submodule parent not written?");
	ParentID = SubmoduleIDs[Mod->Parent];
	}

	// Emit the definition of the block.
	{
	RecordData::value_type Record[] = {SUBMODULE_DEFINITION,
	ID,
	ParentID,
	Mod->IsFramework,
	Mod->IsExplicit,
	Mod->IsSystem,
	Mod->IsExternC,
	Mod->InferSubmodules,
	Mod->InferExplicitSubmodules,
	Mod->InferExportWildcard,
	Mod->ConfigMacrosExhaustive};
	Stream.EmitRecordWithBlob(DefinitionAbbrev, Record, Mod->Name);
	}

	// Emit the requirements.
	for (const auto &R : Mod->Requirements) {
	RecordData::value_type Record[] = {SUBMODULE_REQUIRES, R.second};
	Stream.EmitRecordWithBlob(RequiresAbbrev, Record, R.first);
	}

	// Emit the umbrella header, if there is one.
	if (auto UmbrellaHeader = Mod->getUmbrellaHeader()) {
	RecordData::value_type Record[] = {SUBMODULE_UMBRELLA_HEADER};
	Stream.EmitRecordWithBlob(UmbrellaAbbrev, Record,
	UmbrellaHeader.NameAsWritten);
	} else if (auto UmbrellaDir = Mod->getUmbrellaDir()) {
	RecordData::value_type Record[] = {SUBMODULE_UMBRELLA_DIR};
	Stream.EmitRecordWithBlob(UmbrellaDirAbbrev, Record,
	UmbrellaDir.NameAsWritten);
	}

	// Emit the headers.
	struct {
	unsigned RecordKind;
	unsigned Abbrev;
	Module::HeaderKind HeaderKind;
	} HeaderLists[] = {
	{SUBMODULE_HEADER, HeaderAbbrev, Module::HK_Normal},
	{SUBMODULE_TEXTUAL_HEADER, TextualHeaderAbbrev, Module::HK_Textual},
	{SUBMODULE_PRIVATE_HEADER, PrivateHeaderAbbrev, Module::HK_Private},
	{SUBMODULE_PRIVATE_TEXTUAL_HEADER, PrivateTextualHeaderAbbrev,
	Module::HK_PrivateTextual},
	{SUBMODULE_EXCLUDED_HEADER, ExcludedHeaderAbbrev, Module::HK_Excluded}
	};
	for (auto &HL : HeaderLists) {
	RecordData::value_type Record[] = {HL.RecordKind};
	for (auto &H : Mod->Headers[HL.HeaderKind])
	Stream.EmitRecordWithBlob(HL.Abbrev, Record, H.NameAsWritten);
	}

	// Emit the top headers.
	{
	auto TopHeaders = Mod->getTopHeaders(PP->getFileManager());
	RecordData::value_type Record[] = {SUBMODULE_TOPHEADER};
	for (auto *H : TopHeaders)
	Stream.EmitRecordWithBlob(TopHeaderAbbrev, Record, H->getName());
	}

	// Emit the imports.
	if (!Mod->Imports.empty()) {
	RecordData Record;
	for (auto *I : Mod->Imports)
	Record.push_back(getSubmoduleID(I));
	Stream.EmitRecord(SUBMODULE_IMPORTS, Record);
	}

	// Emit the exports.
	if (!Mod->Exports.empty()) {
	RecordData Record;
	for (const auto &E : Mod->Exports) {
	// FIXME: This may fail; we don't require that all exported modules
	// are local or imported.
	Record.push_back(getSubmoduleID(E.getPointer()));
	Record.push_back(E.getInt());
	}
	Stream.EmitRecord(SUBMODULE_EXPORTS, Record);
	}

	//FIXME: How do we emit the 'use'd modules? They may not be submodules.
	// Might be unnecessary as use declarations are only used to build the
	// module itself.

	// Emit the link libraries.
	for (const auto &LL : Mod->LinkLibraries) {
	RecordData::value_type Record[] = {SUBMODULE_LINK_LIBRARY,
	LL.IsFramework};
	Stream.EmitRecordWithBlob(LinkLibraryAbbrev, Record, LL.Library);
	}

	// Emit the conflicts.
	for (const auto &C : Mod->Conflicts) {
	// FIXME: This may fail; we don't require that all conflicting modules
	// are local or imported.
	RecordData::value_type Record[] = {SUBMODULE_CONFLICT,
	getSubmoduleID(C.Other)};
	Stream.EmitRecordWithBlob(ConflictAbbrev, Record, C.Message);
	}

	// Emit the configuration macros.
	for (const auto &CM : Mod->ConfigMacros) {
	RecordData::value_type Record[] = {SUBMODULE_CONFIG_MACRO};
	Stream.EmitRecordWithBlob(ConfigMacroAbbrev, Record, CM);
	}

	// Emit the initializers, if any.
	RecordData Inits;
	for (Decl *D : Context->getModuleInitializers(Mod))
	Inits.push_back(GetDeclRef(D));
	if (!Inits.empty())
	Stream.EmitRecord(SUBMODULE_INITIALIZERS, Inits);

	// Queue up the submodules of this module.
	for (auto *M : Mod->submodules())
	Q.push(M);
	}

	Stream.ExitBlock();

	assert((NextSubmoduleID - FirstSubmoduleID ==
	getNumberOfModules(WritingModule)) &&
	"Wrong # of submodules; found a reference to a non-local, "
	"non-imported submodule?");
	}

	void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
	bool isModule) {
	llvm::SmallDenseMap<const DiagnosticsEngine::DiagState *, unsigned, 64>
	DiagStateIDMap;
	unsigned CurrID = 0;
	RecordData Record;

	auto EncodeDiagStateFlags =
	[](const DiagnosticsEngine::DiagState *DS) -> unsigned {
	unsigned Result = (unsigned)DS->ExtBehavior;
	for (unsigned Val :
	{(unsigned)DS->IgnoreAllWarnings, (unsigned)DS->EnableAllWarnings,
	(unsigned)DS->WarningsAsErrors, (unsigned)DS->ErrorsAsFatal,
	(unsigned)DS->SuppressSystemWarnings})
	Result = (Result << 1) \| Val;
	return Result;
	};

	unsigned Flags = EncodeDiagStateFlags(Diag.DiagStatesByLoc.FirstDiagState);
	Record.push_back(Flags);

	auto AddDiagState = [&](const DiagnosticsEngine::DiagState *State,
	bool IncludeNonPragmaStates) {
	// Ensure that the diagnostic state wasn't modified since it was created.
	// We will not correctly round-trip this information otherwise.
	assert(Flags == EncodeDiagStateFlags(State) &&
	"diag state flags vary in single AST file");

	unsigned &DiagStateID = DiagStateIDMap[State];
	Record.push_back(DiagStateID);

	if (DiagStateID == 0) {
	DiagStateID = ++CurrID;

	// Add a placeholder for the number of mappings.
	auto SizeIdx = Record.size();
	Record.emplace_back();
	for (const auto &I : *State) {
	if (I.second.isPragma() \|\| IncludeNonPragmaStates) {
	Record.push_back(I.first);
	Record.push_back(I.second.serialize());
	}
	}
	// Update the placeholder.
	Record[SizeIdx] = (Record.size() - SizeIdx) / 2;
	}
	};

	AddDiagState(Diag.DiagStatesByLoc.FirstDiagState, isModule);

	// Reserve a spot for the number of locations with state transitions.
	auto NumLocationsIdx = Record.size();
	Record.emplace_back();

	// Emit the state transitions.
	unsigned NumLocations = 0;
	for (auto &FileIDAndFile : Diag.DiagStatesByLoc.Files) {
	if (!FileIDAndFile.first.isValid() \|\|
	!FileIDAndFile.second.HasLocalTransitions)
	continue;
	++NumLocations;
	AddSourceLocation(Diag.SourceMgr->getLocForStartOfFile(FileIDAndFile.first),
	Record);
	Record.push_back(FileIDAndFile.second.StateTransitions.size());
	for (auto &StatePoint : FileIDAndFile.second.StateTransitions) {
	Record.push_back(StatePoint.Offset);
	AddDiagState(StatePoint.State, false);
	}
	}

	// Backpatch the number of locations.
	Record[NumLocationsIdx] = NumLocations;

	// Emit CurDiagStateLoc. Do it last in order to match source order.
	//
	// This also protects against a hypothetical corner case with simulating
	// -Werror settings for implicit modules in the ASTReader, where reading
	// CurDiagState out of context could change whether warning pragmas are
	// treated as errors.
	AddSourceLocation(Diag.DiagStatesByLoc.CurDiagStateLoc, Record);
	AddDiagState(Diag.DiagStatesByLoc.CurDiagState, false);

	Stream.EmitRecord(DIAG_PRAGMA_MAPPINGS, Record);
	}

	//===----------------------------------------------------------------------===//
	// Type Serialization
	//===----------------------------------------------------------------------===//

	/// \brief Write the representation of a type to the AST stream.
	void ASTWriter::WriteType(QualType T) {
	TypeIdx &IdxRef = TypeIdxs[T];
	if (IdxRef.getIndex() == 0) // we haven't seen this type before.
	IdxRef = TypeIdx(NextTypeID++);
	TypeIdx Idx = IdxRef;

	assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST");

	RecordData Record;

	// Emit the type's representation.
	ASTTypeWriter W(*this, Record);
	W.Visit(T);
	uint64_t Offset = W.Emit();

	// Record the offset for this type.
	unsigned Index = Idx.getIndex() - FirstTypeID;
	if (TypeOffsets.size() == Index)
	TypeOffsets.push_back(Offset);
	else if (TypeOffsets.size() < Index) {
	TypeOffsets.resize(Index + 1);
	TypeOffsets[Index] = Offset;
	} else {
	llvm_unreachable("Types emitted in wrong order");
	}
	}

	//===----------------------------------------------------------------------===//
	// Declaration Serialization
	//===----------------------------------------------------------------------===//

	/// \brief Write the block containing all of the declaration IDs
	/// lexically declared within the given DeclContext.
	///
	/// \returns the offset of the DECL_CONTEXT_LEXICAL block within the
	/// bistream, or 0 if no block was written.
	uint64_t ASTWriter::WriteDeclContextLexicalBlock(ASTContext &Context,
	DeclContext *DC) {
	if (DC->decls_empty())
	return 0;

	uint64_t Offset = Stream.GetCurrentBitNo();
	SmallVector<uint32_t, 128> KindDeclPairs;
	for (const auto *D : DC->decls()) {
	KindDeclPairs.push_back(D->getKind());
	KindDeclPairs.push_back(GetDeclRef(D));
	}

	++NumLexicalDeclContexts;
	RecordData::value_type Record[] = {DECL_CONTEXT_LEXICAL};
	Stream.EmitRecordWithBlob(DeclContextLexicalAbbrev, Record,
	bytes(KindDeclPairs));
	return Offset;
	}

	void ASTWriter::WriteTypeDeclOffsets() {
	using namespace llvm;

	// Write the type offsets array
	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(TYPE_OFFSET));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of types
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base type index
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // types block
	unsigned TypeOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
	{
	RecordData::value_type Record[] = {TYPE_OFFSET, TypeOffsets.size(),
	FirstTypeID - NUM_PREDEF_TYPE_IDS};
	Stream.EmitRecordWithBlob(TypeOffsetAbbrev, Record, bytes(TypeOffsets));
	}

	// Write the declaration offsets array
	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(DECL_OFFSET));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of declarations
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base decl ID
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // declarations block
	unsigned DeclOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
	{
	RecordData::value_type Record[] = {DECL_OFFSET, DeclOffsets.size(),
	FirstDeclID - NUM_PREDEF_DECL_IDS};
	Stream.EmitRecordWithBlob(DeclOffsetAbbrev, Record, bytes(DeclOffsets));
	}
	}

	void ASTWriter::WriteFileDeclIDsMap() {
	using namespace llvm;

	SmallVector<std::pair<FileID, DeclIDInFileInfo *>, 64> SortedFileDeclIDs(
	FileDeclIDs.begin(), FileDeclIDs.end());
	std::sort(SortedFileDeclIDs.begin(), SortedFileDeclIDs.end(),
	llvm::less_first());

	// Join the vectors of DeclIDs from all files.
	SmallVector<DeclID, 256> FileGroupedDeclIDs;
	for (auto &FileDeclEntry : SortedFileDeclIDs) {
	DeclIDInFileInfo &Info = *FileDeclEntry.second;
	Info.FirstDeclIndex = FileGroupedDeclIDs.size();
	for (auto &LocDeclEntry : Info.DeclIDs)
	FileGroupedDeclIDs.push_back(LocDeclEntry.second);
	}

	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(FILE_SORTED_DECLS));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
	unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
	RecordData::value_type Record[] = {FILE_SORTED_DECLS,
	FileGroupedDeclIDs.size()};
	Stream.EmitRecordWithBlob(AbbrevCode, Record, bytes(FileGroupedDeclIDs));
	}

	void ASTWriter::WriteComments() {
	Stream.EnterSubblock(COMMENTS_BLOCK_ID, 3);
	ArrayRef<RawComment *> RawComments = Context->Comments.getComments();
	RecordData Record;
	for (const auto *I : RawComments) {
	Record.clear();
	AddSourceRange(I->getSourceRange(), Record);
	Record.push_back(I->getKind());
	Record.push_back(I->isTrailingComment());
	Record.push_back(I->isAlmostTrailingComment());
	Stream.EmitRecord(COMMENTS_RAW_COMMENT, Record);
	}
	Stream.ExitBlock();
	}

	//===----------------------------------------------------------------------===//
	// Global Method Pool and Selector Serialization
	//===----------------------------------------------------------------------===//

	namespace {

	// Trait used for the on-disk hash table used in the method pool.
	class ASTMethodPoolTrait {
	ASTWriter &Writer;

	public:
	typedef Selector key_type;
	typedef key_type key_type_ref;

	struct data_type {
	SelectorID ID;
	ObjCMethodList Instance, Factory;
	};
	typedef const data_type& data_type_ref;

	typedef unsigned hash_value_type;
	typedef unsigned offset_type;

	explicit ASTMethodPoolTrait(ASTWriter &Writer) : Writer(Writer) { }

	static hash_value_type ComputeHash(Selector Sel) {
	return serialization::ComputeHash(Sel);
	}

	std::pair<unsigned,unsigned>
	EmitKeyDataLength(raw_ostream& Out, Selector Sel,
	data_type_ref Methods) {
	using namespace llvm::support;
	endian::Writer<little> LE(Out);
	unsigned KeyLen = 2 + (Sel.getNumArgs()? Sel.getNumArgs() * 4 : 4);
	LE.write<uint16_t>(KeyLen);
	unsigned DataLen = 4 + 2 + 2; // 2 bytes for each of the method counts
	for (const ObjCMethodList *Method = &Methods.Instance; Method;
	Method = Method->getNext())
	if (Method->getMethod())
	DataLen += 4;
	for (const ObjCMethodList *Method = &Methods.Factory; Method;
	Method = Method->getNext())
	if (Method->getMethod())
	DataLen += 4;
	LE.write<uint16_t>(DataLen);
	return std::make_pair(KeyLen, DataLen);
	}

	void EmitKey(raw_ostream& Out, Selector Sel, unsigned) {
	using namespace llvm::support;
	endian::Writer<little> LE(Out);
	uint64_t Start = Out.tell();
	assert((Start >> 32) == 0 && "Selector key offset too large");
	Writer.SetSelectorOffset(Sel, Start);
	unsigned N = Sel.getNumArgs();
	LE.write<uint16_t>(N);
	if (N == 0)
	N = 1;
	for (unsigned I = 0; I != N; ++I)
	LE.write<uint32_t>(
	Writer.getIdentifierRef(Sel.getIdentifierInfoForSlot(I)));
	}

	void EmitData(raw_ostream& Out, key_type_ref,
	data_type_ref Methods, unsigned DataLen) {
	using namespace llvm::support;
	endian::Writer<little> LE(Out);
	uint64_t Start = Out.tell(); (void)Start;
	LE.write<uint32_t>(Methods.ID);
	unsigned NumInstanceMethods = 0;
	for (const ObjCMethodList *Method = &Methods.Instance; Method;
	Method = Method->getNext())
	if (Method->getMethod())
	++NumInstanceMethods;

	unsigned NumFactoryMethods = 0;
	for (const ObjCMethodList *Method = &Methods.Factory; Method;
	Method = Method->getNext())
	if (Method->getMethod())
	++NumFactoryMethods;

	unsigned InstanceBits = Methods.Instance.getBits();
	assert(InstanceBits < 4);
	unsigned InstanceHasMoreThanOneDeclBit =
	Methods.Instance.hasMoreThanOneDecl();
	unsigned FullInstanceBits = (NumInstanceMethods << 3) \|
	(InstanceHasMoreThanOneDeclBit << 2) \|
	InstanceBits;
	unsigned FactoryBits = Methods.Factory.getBits();
	assert(FactoryBits < 4);
	unsigned FactoryHasMoreThanOneDeclBit =
	Methods.Factory.hasMoreThanOneDecl();
	unsigned FullFactoryBits = (NumFactoryMethods << 3) \|
	(FactoryHasMoreThanOneDeclBit << 2) \|
	FactoryBits;
	LE.write<uint16_t>(FullInstanceBits);
	LE.write<uint16_t>(FullFactoryBits);
	for (const ObjCMethodList *Method = &Methods.Instance; Method;
	Method = Method->getNext())
	if (Method->getMethod())
	LE.write<uint32_t>(Writer.getDeclID(Method->getMethod()));
	for (const ObjCMethodList *Method = &Methods.Factory; Method;
	Method = Method->getNext())
	if (Method->getMethod())
	LE.write<uint32_t>(Writer.getDeclID(Method->getMethod()));

	assert(Out.tell() - Start == DataLen && "Data length is wrong");
	}
	};

	} // end anonymous namespace

	/// \brief Write ObjC data: selectors and the method pool.
	///
	/// The method pool contains both instance and factory methods, stored
	/// in an on-disk hash table indexed by the selector. The hash table also
	/// contains an empty entry for every other selector known to Sema.
	void ASTWriter::WriteSelectors(Sema &SemaRef) {
	using namespace llvm;

	// Do we have to do anything at all?
	if (SemaRef.MethodPool.empty() && SelectorIDs.empty())
	return;
	unsigned NumTableEntries = 0;
	// Create and write out the blob that contains selectors and the method pool.
	{
	llvm::OnDiskChainedHashTableGenerator<ASTMethodPoolTrait> Generator;
	ASTMethodPoolTrait Trait(*this);

	// Create the on-disk hash table representation. We walk through every
	// selector we've seen and look it up in the method pool.
	SelectorOffsets.resize(NextSelectorID - FirstSelectorID);
	for (auto &SelectorAndID : SelectorIDs) {
	Selector S = SelectorAndID.first;
	SelectorID ID = SelectorAndID.second;
	Sema::GlobalMethodPool::iterator F = SemaRef.MethodPool.find(S);
	ASTMethodPoolTrait::data_type Data = {
	ID,
	ObjCMethodList(),
	ObjCMethodList()
	};
	if (F != SemaRef.MethodPool.end()) {
	Data.Instance = F->second.first;
	Data.Factory = F->second.second;
	}
	// Only write this selector if it's not in an existing AST or something
	// changed.
	if (Chain && ID < FirstSelectorID) {
	// Selector already exists. Did it change?
	bool changed = false;
	for (ObjCMethodList *M = &Data.Instance;
	!changed && M && M->getMethod(); M = M->getNext()) {
	if (!M->getMethod()->isFromASTFile())
	changed = true;
	}
	for (ObjCMethodList *M = &Data.Factory; !changed && M && M->getMethod();
	M = M->getNext()) {
	if (!M->getMethod()->isFromASTFile())
	changed = true;
	}
	if (!changed)
	continue;
	} else if (Data.Instance.getMethod() \|\| Data.Factory.getMethod()) {
	// A new method pool entry.
	++NumTableEntries;
	}
	Generator.insert(S, Data, Trait);
	}

	// Create the on-disk hash table in a buffer.
	SmallString<4096> MethodPool;
	uint32_t BucketOffset;
	{
	using namespace llvm::support;
	ASTMethodPoolTrait Trait(*this);
	llvm::raw_svector_ostream Out(MethodPool);
	// Make sure that no bucket is at offset 0
	endian::Writer<little>(Out).write<uint32_t>(0);
	BucketOffset = Generator.Emit(Out, Trait);
	}

	// Create a blob abbreviation
	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(METHOD_POOL));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
	unsigned MethodPoolAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	// Write the method pool
	{
	RecordData::value_type Record[] = {METHOD_POOL, BucketOffset,
	NumTableEntries};
	Stream.EmitRecordWithBlob(MethodPoolAbbrev, Record, MethodPool);
	}

	// Create a blob abbreviation for the selector table offsets.
	Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(SELECTOR_OFFSETS));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // size
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
	unsigned SelectorOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	// Write the selector offsets table.
	{
	RecordData::value_type Record[] = {
	SELECTOR_OFFSETS, SelectorOffsets.size(),
	FirstSelectorID - NUM_PREDEF_SELECTOR_IDS};
	Stream.EmitRecordWithBlob(SelectorOffsetAbbrev, Record,
	bytes(SelectorOffsets));
	}
	}
	}

	/// \brief Write the selectors referenced in @selector expression into AST file.
	void ASTWriter::WriteReferencedSelectorsPool(Sema &SemaRef) {
	using namespace llvm;
	if (SemaRef.ReferencedSelectors.empty())
	return;

	RecordData Record;
	ASTRecordWriter Writer(*this, Record);

	// Note: this writes out all references even for a dependent AST. But it is
	// very tricky to fix, and given that @selector shouldn't really appear in
	// headers, probably not worth it. It's not a correctness issue.
	for (auto &SelectorAndLocation : SemaRef.ReferencedSelectors) {
	Selector Sel = SelectorAndLocation.first;
	SourceLocation Loc = SelectorAndLocation.second;
	Writer.AddSelectorRef(Sel);
	Writer.AddSourceLocation(Loc);
	}
	Writer.Emit(REFERENCED_SELECTOR_POOL);
	}

	//===----------------------------------------------------------------------===//
	// Identifier Table Serialization
	//===----------------------------------------------------------------------===//

	/// Determine the declaration that should be put into the name lookup table to
	/// represent the given declaration in this module. This is usually D itself,
	/// but if D was imported and merged into a local declaration, we want the most
	/// recent local declaration instead. The chosen declaration will be the most
	/// recent declaration in any module that imports this one.
	static NamedDecl *getDeclForLocalLookup(const LangOptions &LangOpts,
	NamedDecl *D) {
	if (!LangOpts.Modules \|\| !D->isFromASTFile())
	return D;

	if (Decl *Redecl = D->getPreviousDecl()) {
	// For Redeclarable decls, a prior declaration might be local.
	for (; Redecl; Redecl = Redecl->getPreviousDecl()) {
	// If we find a local decl, we're done.
	if (!Redecl->isFromASTFile()) {
	// Exception: in very rare cases (for injected-class-names), not all
	// redeclarations are in the same semantic context. Skip ones in a
	// different context. They don't go in this lookup table at all.
	if (!Redecl->getDeclContext()->getRedeclContext()->Equals(
	D->getDeclContext()->getRedeclContext()))
	continue;
	return cast<NamedDecl>(Redecl);
	}

	// If we find a decl from a (chained-)PCH stop since we won't find a
	// local one.
	if (Redecl->getOwningModuleID() == 0)
	break;
	}
	} else if (Decl *First = D->getCanonicalDecl()) {
	// For Mergeable decls, the first decl might be local.
	if (!First->isFromASTFile())
	return cast<NamedDecl>(First);
	}

	// All declarations are imported. Our most recent declaration will also be
	// the most recent one in anyone who imports us.
	return D;
	}

	namespace {

	class ASTIdentifierTableTrait {
	ASTWriter &Writer;
	Preprocessor &PP;
	IdentifierResolver &IdResolver;
	bool IsModule;
	bool NeedDecls;
	ASTWriter::RecordData *InterestingIdentifierOffsets;

	/// \brief Determines whether this is an "interesting" identifier that needs a
	/// full IdentifierInfo structure written into the hash table. Notably, this
	/// doesn't check whether the name has macros defined; use PublicMacroIterator
	/// to check that.
	bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) {
	if (MacroOffset \|\|
	II->isPoisoned() \|\|
	(IsModule ? II->hasRevertedBuiltin() : II->getObjCOrBuiltinID()) \|\|
	II->hasRevertedTokenIDToIdentifier() \|\|
	(NeedDecls && II->getFETokenInfo<void>()))
	return true;

	return false;
	}

	public:
	typedef IdentifierInfo* key_type;
	typedef key_type key_type_ref;

	typedef IdentID data_type;
	typedef data_type data_type_ref;

	typedef unsigned hash_value_type;
	typedef unsigned offset_type;

	ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
	IdentifierResolver &IdResolver, bool IsModule,
	ASTWriter::RecordData *InterestingIdentifierOffsets)
	: Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule),
	NeedDecls(!IsModule \|\| !Writer.getLangOpts().CPlusPlus),
	InterestingIdentifierOffsets(InterestingIdentifierOffsets) {}

	bool needDecls() const { return NeedDecls; }

	static hash_value_type ComputeHash(const IdentifierInfo* II) {
	return llvm::HashString(II->getName());
	}

	bool isInterestingIdentifier(const IdentifierInfo *II) {
	auto MacroOffset = Writer.getMacroDirectivesOffset(II);
	return isInterestingIdentifier(II, MacroOffset);
	}

	bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
	return isInterestingIdentifier(II, 0);
	}

	std::pair<unsigned,unsigned>
	EmitKeyDataLength(raw_ostream& Out, IdentifierInfo* II, IdentID ID) {
	unsigned KeyLen = II->getLength() + 1;
	unsigned DataLen = 4; // 4 bytes for the persistent ID << 1
	auto MacroOffset = Writer.getMacroDirectivesOffset(II);
	if (isInterestingIdentifier(II, MacroOffset)) {
	DataLen += 2; // 2 bytes for builtin ID
	DataLen += 2; // 2 bytes for flags
	if (MacroOffset)
	DataLen += 4; // MacroDirectives offset.

	if (NeedDecls) {
	for (IdentifierResolver::iterator D = IdResolver.begin(II),
	DEnd = IdResolver.end();
	D != DEnd; ++D)
	DataLen += 4;
	}
	}
	using namespace llvm::support;
	endian::Writer<little> LE(Out);

	assert((uint16_t)DataLen == DataLen && (uint16_t)KeyLen == KeyLen);
	LE.write<uint16_t>(DataLen);
	// We emit the key length after the data length so that every
	// string is preceded by a 16-bit length. This matches the PTH
	// format for storing identifiers.
	LE.write<uint16_t>(KeyLen);
	return std::make_pair(KeyLen, DataLen);
	}

	void EmitKey(raw_ostream& Out, const IdentifierInfo* II,
	unsigned KeyLen) {
	// Record the location of the key data. This is used when generating
	// the mapping from persistent IDs to strings.
	Writer.SetIdentifierOffset(II, Out.tell());

	// Emit the offset of the key/data length information to the interesting
	// identifiers table if necessary.
	if (InterestingIdentifierOffsets && isInterestingIdentifier(II))
	InterestingIdentifierOffsets->push_back(Out.tell() - 4);

	Out.write(II->getNameStart(), KeyLen);
	}

	void EmitData(raw_ostream& Out, IdentifierInfo* II,
	IdentID ID, unsigned) {
	using namespace llvm::support;
	endian::Writer<little> LE(Out);

	auto MacroOffset = Writer.getMacroDirectivesOffset(II);
	if (!isInterestingIdentifier(II, MacroOffset)) {
	LE.write<uint32_t>(ID << 1);
	return;
	}

	LE.write<uint32_t>((ID << 1) \| 0x01);
	uint32_t Bits = (uint32_t)II->getObjCOrBuiltinID();
	assert((Bits & 0xffff) == Bits && "ObjCOrBuiltinID too big for ASTReader.");
	LE.write<uint16_t>(Bits);
	Bits = 0;
	bool HadMacroDefinition = MacroOffset != 0;
	Bits = (Bits << 1) \| unsigned(HadMacroDefinition);
	Bits = (Bits << 1) \| unsigned(II->isExtensionToken());
	Bits = (Bits << 1) \| unsigned(II->isPoisoned());
	Bits = (Bits << 1) \| unsigned(II->hasRevertedBuiltin());
	Bits = (Bits << 1) \| unsigned(II->hasRevertedTokenIDToIdentifier());
	Bits = (Bits << 1) \| unsigned(II->isCPlusPlusOperatorKeyword());
	LE.write<uint16_t>(Bits);

	if (HadMacroDefinition)
	LE.write<uint32_t>(MacroOffset);

	if (NeedDecls) {
	// Emit the declaration IDs in reverse order, because the
	// IdentifierResolver provides the declarations as they would be
	// visible (e.g., the function "stat" would come before the struct
	// "stat"), but the ASTReader adds declarations to the end of the list
	// (so we need to see the struct "stat" before the function "stat").
	// Only emit declarations that aren't from a chained PCH, though.
	SmallVector<NamedDecl *, 16> Decls(IdResolver.begin(II),
	IdResolver.end());
	for (SmallVectorImpl<NamedDecl *>::reverse_iterator D = Decls.rbegin(),
	DEnd = Decls.rend();
	D != DEnd; ++D)
	LE.write<uint32_t>(
	Writer.getDeclID(getDeclForLocalLookup(PP.getLangOpts(), *D)));
	}
	}
	};

	} // end anonymous namespace

	/// \brief Write the identifier table into the AST file.
	///
	/// The identifier table consists of a blob containing string data
	/// (the actual identifiers themselves) and a separate "offsets" index
	/// that maps identifier IDs to locations within the blob.
	void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
	IdentifierResolver &IdResolver,
	bool IsModule) {
	using namespace llvm;

	RecordData InterestingIdents;

	// Create and write out the blob that contains the identifier
	// strings.
	{
	llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
	ASTIdentifierTableTrait Trait(
	*this, PP, IdResolver, IsModule,
	(getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : nullptr);

	// Look for any identifiers that were named while processing the
	// headers, but are otherwise not needed. We add these to the hash
	// table to enable checking of the predefines buffer in the case
	// where the user adds new macro definitions when building the AST
	// file.
	SmallVector<const IdentifierInfo *, 128> IIs;
	for (const auto &ID : PP.getIdentifierTable())
	IIs.push_back(ID.second);
	// Sort the identifiers lexicographically before getting them references so
	// that their order is stable.
	std::sort(IIs.begin(), IIs.end(), llvm::less_ptr<IdentifierInfo>());
	for (const IdentifierInfo *II : IIs)
	if (Trait.isInterestingNonMacroIdentifier(II))
	getIdentifierRef(II);

	// Create the on-disk hash table representation. We only store offsets
	// for identifiers that appear here for the first time.
	IdentifierOffsets.resize(NextIdentID - FirstIdentID);
	for (auto IdentIDPair : IdentifierIDs) {
	auto II = const_cast<IdentifierInfo >(IdentIDPair.first);
	IdentID ID = IdentIDPair.second;
	assert(II && "NULL identifier in identifier table");
	// Write out identifiers if either the ID is local or the identifier has
	// changed since it was loaded.
	if (ID >= FirstIdentID \|\| !Chain \|\| !II->isFromAST()
	\|\| II->hasChangedSinceDeserialization() \|\|
	(Trait.needDecls() &&
	II->hasFETokenInfoChangedSinceDeserialization()))
	Generator.insert(II, ID, Trait);
	}

	// Create the on-disk hash table in a buffer.
	SmallString<4096> IdentifierTable;
	uint32_t BucketOffset;
	{
	using namespace llvm::support;
	llvm::raw_svector_ostream Out(IdentifierTable);
	// Make sure that no bucket is at offset 0
	endian::Writer<little>(Out).write<uint32_t>(0);
	BucketOffset = Generator.Emit(Out, Trait);
	}

	// Create a blob abbreviation
	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_TABLE));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
	unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	// Write the identifier table
	RecordData::value_type Record[] = {IDENTIFIER_TABLE, BucketOffset};
	Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable);
	}

	// Write the offsets table for identifier IDs.
	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_OFFSET));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of identifiers
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
	unsigned IdentifierOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));

	#ifndef NDEBUG
	for (unsigned I = 0, N = IdentifierOffsets.size(); I != N; ++I)
	assert(IdentifierOffsets[I] && "Missing identifier offset?");
	#endif

	RecordData::value_type Record[] = {IDENTIFIER_OFFSET,
	IdentifierOffsets.size(),
	FirstIdentID - NUM_PREDEF_IDENT_IDS};
	Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record,
	bytes(IdentifierOffsets));

	// In C++, write the list of interesting identifiers (those that are
	// defined as macros, poisoned, or similar unusual things).
	if (!InterestingIdents.empty())
	Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents);
	}

	//===----------------------------------------------------------------------===//
	// DeclContext's Name Lookup Table Serialization
	//===----------------------------------------------------------------------===//

	namespace {

	// Trait used for the on-disk hash table used in the method pool.
	class ASTDeclContextNameLookupTrait {
	ASTWriter &Writer;
	llvm::SmallVector<DeclID, 64> DeclIDs;

	public:
	typedef DeclarationNameKey key_type;
	typedef key_type key_type_ref;

	/// A start and end index into DeclIDs, representing a sequence of decls.
	typedef std::pair<unsigned, unsigned> data_type;
	typedef const data_type& data_type_ref;

	typedef unsigned hash_value_type;
	typedef unsigned offset_type;

	explicit ASTDeclContextNameLookupTrait(ASTWriter &Writer) : Writer(Writer) { }

	template<typename Coll>
	data_type getData(const Coll &Decls) {
	unsigned Start = DeclIDs.size();
	for (NamedDecl *D : Decls) {
	DeclIDs.push_back(
	Writer.GetDeclRef(getDeclForLocalLookup(Writer.getLangOpts(), D)));
	}
	return std::make_pair(Start, DeclIDs.size());
	}

	data_type ImportData(const reader::ASTDeclContextNameLookupTrait::data_type &FromReader) {
	unsigned Start = DeclIDs.size();
	for (auto ID : FromReader)
	DeclIDs.push_back(ID);
	return std::make_pair(Start, DeclIDs.size());
	}

	static bool EqualKey(key_type_ref a, key_type_ref b) {
	return a == b;
	}

	hash_value_type ComputeHash(DeclarationNameKey Name) {
	return Name.getHash();
	}

	void EmitFileRef(raw_ostream &Out, ModuleFile *F) const {
	assert(Writer.hasChain() &&
	"have reference to loaded module file but no chain?");

	using namespace llvm::support;
	endian::Writer<little>(Out)
	.write<uint32_t>(Writer.getChain()->getModuleFileID(F));
	}

	std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream &Out,
	DeclarationNameKey Name,
	data_type_ref Lookup) {
	using namespace llvm::support;
	endian::Writer<little> LE(Out);
	unsigned KeyLen = 1;
	switch (Name.getKind()) {
	case DeclarationName::Identifier:
	case DeclarationName::ObjCZeroArgSelector:
	case DeclarationName::ObjCOneArgSelector:
	case DeclarationName::ObjCMultiArgSelector:
	case DeclarationName::CXXLiteralOperatorName:
	case DeclarationName::CXXDeductionGuideName:
	KeyLen += 4;
	break;
	case DeclarationName::CXXOperatorName:
	KeyLen += 1;
	break;
	case DeclarationName::CXXConstructorName:
	case DeclarationName::CXXDestructorName:
	case DeclarationName::CXXConversionFunctionName:
	case DeclarationName::CXXUsingDirective:
	break;
	}
	LE.write<uint16_t>(KeyLen);

	// 4 bytes for each DeclID.
	unsigned DataLen = 4 * (Lookup.second - Lookup.first);
	assert(uint16_t(DataLen) == DataLen &&
	"too many decls for serialized lookup result");
	LE.write<uint16_t>(DataLen);

	return std::make_pair(KeyLen, DataLen);
	}

	void EmitKey(raw_ostream &Out, DeclarationNameKey Name, unsigned) {
	using namespace llvm::support;
	endian::Writer<little> LE(Out);
	LE.write<uint8_t>(Name.getKind());
	switch (Name.getKind()) {
	case DeclarationName::Identifier:
	case DeclarationName::CXXLiteralOperatorName:
	case DeclarationName::CXXDeductionGuideName:
	LE.write<uint32_t>(Writer.getIdentifierRef(Name.getIdentifier()));
	return;
	case DeclarationName::ObjCZeroArgSelector:
	case DeclarationName::ObjCOneArgSelector:
	case DeclarationName::ObjCMultiArgSelector:
	LE.write<uint32_t>(Writer.getSelectorRef(Name.getSelector()));
	return;
	case DeclarationName::CXXOperatorName:
	assert(Name.getOperatorKind() < NUM_OVERLOADED_OPERATORS &&
	"Invalid operator?");
	LE.write<uint8_t>(Name.getOperatorKind());
	return;
	case DeclarationName::CXXConstructorName:
	case DeclarationName::CXXDestructorName:
	case DeclarationName::CXXConversionFunctionName:
	case DeclarationName::CXXUsingDirective:
	return;
	}

	llvm_unreachable("Invalid name kind?");
	}

	void EmitData(raw_ostream &Out, key_type_ref, data_type Lookup,
	unsigned DataLen) {
	using namespace llvm::support;
	endian::Writer<little> LE(Out);
	uint64_t Start = Out.tell(); (void)Start;
	for (unsigned I = Lookup.first, N = Lookup.second; I != N; ++I)
	LE.write<uint32_t>(DeclIDs[I]);
	assert(Out.tell() - Start == DataLen && "Data length is wrong");
	}
	};

	} // end anonymous namespace

	bool ASTWriter::isLookupResultExternal(StoredDeclsList &Result,
	DeclContext *DC) {
	return Result.hasExternalDecls() && DC->NeedToReconcileExternalVisibleStorage;
	}

	bool ASTWriter::isLookupResultEntirelyExternal(StoredDeclsList &Result,
	DeclContext *DC) {
	for (auto *D : Result.getLookupResult())
	if (!getDeclForLocalLookup(getLangOpts(), D)->isFromASTFile())
	return false;

	return true;
	}

	void
	ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
	llvm::SmallVectorImpl<char> &LookupTable) {
	assert(!ConstDC->HasLazyLocalLexicalLookups &&
	!ConstDC->HasLazyExternalLexicalLookups &&
	"must call buildLookups first");

	// FIXME: We need to build the lookups table, which is logically const.
	auto DC = const_cast<DeclContext>(ConstDC);
	assert(DC == DC->getPrimaryContext() && "only primary DC has lookup table");

	// Create the on-disk hash table representation.
	MultiOnDiskHashTableGenerator<reader::ASTDeclContextNameLookupTrait,
	ASTDeclContextNameLookupTrait> Generator;
	ASTDeclContextNameLookupTrait Trait(*this);

	// The first step is to collect the declaration names which we need to
	// serialize into the name lookup table, and to collect them in a stable
	// order.
	SmallVector<DeclarationName, 16> Names;

	// We also build up small sets of the constructor and conversion function
	// names which are visible.
	llvm::SmallSet<DeclarationName, 8> ConstructorNameSet, ConversionNameSet;

	for (auto &Lookup : *DC->buildLookup()) {
	auto &Name = Lookup.first;
	auto &Result = Lookup.second;

	// If there are no local declarations in our lookup result, we
	// don't need to write an entry for the name at all. If we can't
	// write out a lookup set without performing more deserialization,
	// just skip this entry.
	if (isLookupResultExternal(Result, DC) &&
	isLookupResultEntirelyExternal(Result, DC))
	continue;

	// We also skip empty results. If any of the results could be external and
	// the currently available results are empty, then all of the results are
	// external and we skip it above. So the only way we get here with an empty
	// results is when no results could have been external and we have
	// external results.
	//
	// FIXME: While we might want to start emitting on-disk entries for negative
	// lookups into a decl context as an optimization, today we have to skip
	// them because there are names with empty lookup results in decl contexts
	// which we can't emit in any stable ordering: we lookup constructors and
	// conversion functions in the enclosing namespace scope creating empty
	// results for them. This in almost certainly a bug in Clang's name lookup,
	// but that is likely to be hard or impossible to fix and so we tolerate it
	// here by omitting lookups with empty results.
	if (Lookup.second.getLookupResult().empty())
	continue;

	switch (Lookup.first.getNameKind()) {
	default:
	Names.push_back(Lookup.first);
	break;

	case DeclarationName::CXXConstructorName:
	assert(isa<CXXRecordDecl>(DC) &&
	"Cannot have a constructor name outside of a class!");
	ConstructorNameSet.insert(Name);
	break;

	case DeclarationName::CXXConversionFunctionName:
	assert(isa<CXXRecordDecl>(DC) &&
	"Cannot have a conversion function name outside of a class!");
	ConversionNameSet.insert(Name);
	break;
	}
	}

	// Sort the names into a stable order.
	std::sort(Names.begin(), Names.end());

	if (auto *D = dyn_cast<CXXRecordDecl>(DC)) {
	// We need to establish an ordering of constructor and conversion function
	// names, and they don't have an intrinsic ordering.

	// First we try the easy case by forming the current context's constructor
	// name and adding that name first. This is a very useful optimization to
	// avoid walking the lexical declarations in many cases, and it also
	// handles the only case where a constructor name can come from some other
	// lexical context -- when that name is an implicit constructor merged from
	// another declaration in the redecl chain. Any non-implicit constructor or
	// conversion function which doesn't occur in all the lexical contexts
	// would be an ODR violation.
	auto ImplicitCtorName = Context->DeclarationNames.getCXXConstructorName(
	Context->getCanonicalType(Context->getRecordType(D)));
	if (ConstructorNameSet.erase(ImplicitCtorName))
	Names.push_back(ImplicitCtorName);

	// If we still have constructors or conversion functions, we walk all the
	// names in the decl and add the constructors and conversion functions
	// which are visible in the order they lexically occur within the context.
	if (!ConstructorNameSet.empty() \|\| !ConversionNameSet.empty())
	for (Decl *ChildD : cast<CXXRecordDecl>(DC)->decls())
	if (auto *ChildND = dyn_cast<NamedDecl>(ChildD)) {
	auto Name = ChildND->getDeclName();
	switch (Name.getNameKind()) {
	default:
	continue;

	case DeclarationName::CXXConstructorName:
	if (ConstructorNameSet.erase(Name))
	Names.push_back(Name);
	break;

	case DeclarationName::CXXConversionFunctionName:
	if (ConversionNameSet.erase(Name))
	Names.push_back(Name);
	break;
	}

	if (ConstructorNameSet.empty() && ConversionNameSet.empty())
	break;
	}

	assert(ConstructorNameSet.empty() && "Failed to find all of the visible "
	"constructors by walking all the "
	"lexical members of the context.");
	assert(ConversionNameSet.empty() && "Failed to find all of the visible "
	"conversion functions by walking all "
	"the lexical members of the context.");
	}

	// Next we need to do a lookup with each name into this decl context to fully
	// populate any results from external sources. We don't actually use the
	// results of these lookups because we only want to use the results after all
	// results have been loaded and the pointers into them will be stable.
	for (auto &Name : Names)
	DC->lookup(Name);

	// Now we need to insert the results for each name into the hash table. For
	// constructor names and conversion function names, we actually need to merge
	// all of the results for them into one list of results each and insert
	// those.
	SmallVector<NamedDecl *, 8> ConstructorDecls;
	SmallVector<NamedDecl *, 8> ConversionDecls;

	// Now loop over the names, either inserting them or appending for the two
	// special cases.
	for (auto &Name : Names) {
	DeclContext::lookup_result Result = DC->noload_lookup(Name);

	switch (Name.getNameKind()) {
	default:
	Generator.insert(Name, Trait.getData(Result), Trait);
	break;

	case DeclarationName::CXXConstructorName:
	ConstructorDecls.append(Result.begin(), Result.end());
	break;

	case DeclarationName::CXXConversionFunctionName:
	ConversionDecls.append(Result.begin(), Result.end());
	break;
	}
	}

	// Handle our two special cases if we ended up having any. We arbitrarily use
	// the first declaration's name here because the name itself isn't part of
	// the key, only the kind of name is used.
	if (!ConstructorDecls.empty())
	Generator.insert(ConstructorDecls.front()->getDeclName(),
	Trait.getData(ConstructorDecls), Trait);
	if (!ConversionDecls.empty())
	Generator.insert(ConversionDecls.front()->getDeclName(),
	Trait.getData(ConversionDecls), Trait);

	// Create the on-disk hash table. Also emit the existing imported and
	// merged table if there is one.
	auto *Lookups = Chain ? Chain->getLoadedLookupTables(DC) : nullptr;
	Generator.emit(LookupTable, Trait, Lookups ? &Lookups->Table : nullptr);
	}

	/// \brief Write the block containing all of the declaration IDs
	/// visible from the given DeclContext.
	///
	/// \returns the offset of the DECL_CONTEXT_VISIBLE block within the
	/// bitstream, or 0 if no block was written.
	uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context,
	DeclContext *DC) {
	// If we imported a key declaration of this namespace, write the visible
	// lookup results as an update record for it rather than including them
	// on this declaration. We will only look at key declarations on reload.
	if (isa<NamespaceDecl>(DC) && Chain &&
	Chain->getKeyDeclaration(cast<Decl>(DC))->isFromASTFile()) {
	// Only do this once, for the first local declaration of the namespace.
	for (auto *Prev = cast<NamespaceDecl>(DC)->getPreviousDecl(); Prev;
	Prev = Prev->getPreviousDecl())
	if (!Prev->isFromASTFile())
	return 0;

	// Note that we need to emit an update record for the primary context.
	UpdatedDeclContexts.insert(DC->getPrimaryContext());

	// Make sure all visible decls are written. They will be recorded later. We
	// do this using a side data structure so we can sort the names into
	// a deterministic order.
	StoredDeclsMap *Map = DC->getPrimaryContext()->buildLookup();
	SmallVector<std::pair<DeclarationName, DeclContext::lookup_result>, 16>
	LookupResults;
	if (Map) {
	LookupResults.reserve(Map->size());
	for (auto &Entry : *Map)
	LookupResults.push_back(
	std::make_pair(Entry.first, Entry.second.getLookupResult()));
	}

	std::sort(LookupResults.begin(), LookupResults.end(), llvm::less_first());
	for (auto &NameAndResult : LookupResults) {
	DeclarationName Name = NameAndResult.first;
	DeclContext::lookup_result Result = NameAndResult.second;
	if (Name.getNameKind() == DeclarationName::CXXConstructorName \|\|
	Name.getNameKind() == DeclarationName::CXXConversionFunctionName) {
	// We have to work around a name lookup bug here where negative lookup
	// results for these names get cached in namespace lookup tables (these
	// names should never be looked up in a namespace).
	assert(Result.empty() && "Cannot have a constructor or conversion "
	"function name in a namespace!");
	continue;
	}

	for (NamedDecl *ND : Result)
	if (!ND->isFromASTFile())
	GetDeclRef(ND);
	}

	return 0;
	}

	if (DC->getPrimaryContext() != DC)
	return 0;

	// Skip contexts which don't support name lookup.
	if (!DC->isLookupContext())
	return 0;

	// If not in C++, we perform name lookup for the translation unit via the
	// IdentifierInfo chains, don't bother to build a visible-declarations table.
	if (DC->isTranslationUnit() && !Context.getLangOpts().CPlusPlus)
	return 0;

	// Serialize the contents of the mapping used for lookup. Note that,
	// although we have two very different code paths, the serialized
	// representation is the same for both cases: a declaration name,
	// followed by a size, followed by references to the visible
	// declarations that have that name.
	uint64_t Offset = Stream.GetCurrentBitNo();
	StoredDeclsMap *Map = DC->buildLookup();
	if (!Map \|\| Map->empty())
	return 0;

	// Create the on-disk hash table in a buffer.
	SmallString<4096> LookupTable;
	GenerateNameLookupTable(DC, LookupTable);

	// Write the lookup table
	RecordData::value_type Record[] = {DECL_CONTEXT_VISIBLE};
	Stream.EmitRecordWithBlob(DeclContextVisibleLookupAbbrev, Record,
	LookupTable);
	++NumVisibleDeclContexts;
	return Offset;
	}

	/// \brief Write an UPDATE_VISIBLE block for the given context.
	///
	/// UPDATE_VISIBLE blocks contain the declarations that are added to an existing
	/// DeclContext in a dependent AST file. As such, they only exist for the TU
	/// (in C++), for namespaces, and for classes with forward-declared unscoped
	/// enumeration members (in C++11).
	void ASTWriter::WriteDeclContextVisibleUpdate(const DeclContext *DC) {
	StoredDeclsMap *Map = DC->getLookupPtr();
	if (!Map \|\| Map->empty())
	return;

	// Create the on-disk hash table in a buffer.
	SmallString<4096> LookupTable;
	GenerateNameLookupTable(DC, LookupTable);

	// If we're updating a namespace, select a key declaration as the key for the
	// update record; those are the only ones that will be checked on reload.
	if (isa<NamespaceDecl>(DC))
	DC = cast<DeclContext>(Chain->getKeyDeclaration(cast<Decl>(DC)));

	// Write the lookup table
	RecordData::value_type Record[] = {UPDATE_VISIBLE, getDeclID(cast<Decl>(DC))};
	Stream.EmitRecordWithBlob(UpdateVisibleAbbrev, Record, LookupTable);
	}

	/// \brief Write an FP_PRAGMA_OPTIONS block for the given FPOptions.
	void ASTWriter::WriteFPPragmaOptions(const FPOptions &Opts) {
	RecordData::value_type Record[] = {Opts.getInt()};
	Stream.EmitRecord(FP_PRAGMA_OPTIONS, Record);
	}

	/// \brief Write an OPENCL_EXTENSIONS block for the given OpenCLOptions.
	void ASTWriter::WriteOpenCLExtensions(Sema &SemaRef) {
	if (!SemaRef.Context.getLangOpts().OpenCL)
	return;

	const OpenCLOptions &Opts = SemaRef.getOpenCLOptions();
	RecordData Record;
	for (const auto &I:Opts.OptMap) {
	AddString(I.getKey(), Record);
	auto V = I.getValue();
	Record.push_back(V.Supported ? 1 : 0);
	Record.push_back(V.Enabled ? 1 : 0);
	Record.push_back(V.Avail);
	Record.push_back(V.Core);
	}
	Stream.EmitRecord(OPENCL_EXTENSIONS, Record);
	}

	void ASTWriter::WriteOpenCLExtensionTypes(Sema &SemaRef) {
	if (!SemaRef.Context.getLangOpts().OpenCL)
	return;

	RecordData Record;
	for (const auto &I : SemaRef.OpenCLTypeExtMap) {
	Record.push_back(
	static_cast<unsigned>(getTypeID(I.first->getCanonicalTypeInternal())));
	Record.push_back(I.second.size());
	for (auto Ext : I.second)
	AddString(Ext, Record);
	}
	Stream.EmitRecord(OPENCL_EXTENSION_TYPES, Record);
	}

	void ASTWriter::WriteOpenCLExtensionDecls(Sema &SemaRef) {
	if (!SemaRef.Context.getLangOpts().OpenCL)
	return;

	RecordData Record;
	for (const auto &I : SemaRef.OpenCLDeclExtMap) {
	Record.push_back(getDeclID(I.first));
	Record.push_back(static_cast<unsigned>(I.second.size()));
	for (auto Ext : I.second)
	AddString(Ext, Record);
	}
	Stream.EmitRecord(OPENCL_EXTENSION_DECLS, Record);
	}

	void ASTWriter::WriteCUDAPragmas(Sema &SemaRef) {
	if (SemaRef.ForceCUDAHostDeviceDepth > 0) {
	RecordData::value_type Record[] = {SemaRef.ForceCUDAHostDeviceDepth};
	Stream.EmitRecord(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH, Record);
	}
	}

	void ASTWriter::WriteObjCCategories() {
	SmallVector<ObjCCategoriesInfo, 2> CategoriesMap;
	RecordData Categories;

	for (unsigned I = 0, N = ObjCClassesWithCategories.size(); I != N; ++I) {
	unsigned Size = 0;
	unsigned StartIndex = Categories.size();

	ObjCInterfaceDecl *Class = ObjCClassesWithCategories[I];

	// Allocate space for the size.
	Categories.push_back(0);

	// Add the categories.
	for (ObjCInterfaceDecl::known_categories_iterator
	Cat = Class->known_categories_begin(),
	CatEnd = Class->known_categories_end();
	Cat != CatEnd; ++Cat, ++Size) {
	assert(getDeclID(*Cat) != 0 && "Bogus category");
	AddDeclRef(*Cat, Categories);
	}

	// Update the size.
	Categories[StartIndex] = Size;

	// Record this interface -> category map.
	ObjCCategoriesInfo CatInfo = { getDeclID(Class), StartIndex };
	CategoriesMap.push_back(CatInfo);
	}

	// Sort the categories map by the definition ID, since the reader will be
	// performing binary searches on this information.
	llvm::array_pod_sort(CategoriesMap.begin(), CategoriesMap.end());

	// Emit the categories map.
	using namespace llvm;

	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(OBJC_CATEGORIES_MAP));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of entries
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
	unsigned AbbrevID = Stream.EmitAbbrev(std::move(Abbrev));

	RecordData::value_type Record[] = {OBJC_CATEGORIES_MAP, CategoriesMap.size()};
	Stream.EmitRecordWithBlob(AbbrevID, Record,
	reinterpret_cast<char *>(CategoriesMap.data()),
	CategoriesMap.size() * sizeof(ObjCCategoriesInfo));

	// Emit the category lists.
	Stream.EmitRecord(OBJC_CATEGORIES, Categories);
	}

	void ASTWriter::WriteLateParsedTemplates(Sema &SemaRef) {
	Sema::LateParsedTemplateMapT &LPTMap = SemaRef.LateParsedTemplateMap;

	if (LPTMap.empty())
	return;

	RecordData Record;
	for (auto &LPTMapEntry : LPTMap) {
	const FunctionDecl *FD = LPTMapEntry.first;
	LateParsedTemplate &LPT = *LPTMapEntry.second;
	AddDeclRef(FD, Record);
	AddDeclRef(LPT.D, Record);
	Record.push_back(LPT.Toks.size());

	for (const auto &Tok : LPT.Toks) {
	AddToken(Tok, Record);
	}
	}
	Stream.EmitRecord(LATE_PARSED_TEMPLATE, Record);
	}

	/// \brief Write the state of 'pragma clang optimize' at the end of the module.
	void ASTWriter::WriteOptimizePragmaOptions(Sema &SemaRef) {
	RecordData Record;
	SourceLocation PragmaLoc = SemaRef.getOptimizeOffPragmaLocation();
	AddSourceLocation(PragmaLoc, Record);
	Stream.EmitRecord(OPTIMIZE_PRAGMA_OPTIONS, Record);
	}

	/// \brief Write the state of 'pragma ms_struct' at the end of the module.
	void ASTWriter::WriteMSStructPragmaOptions(Sema &SemaRef) {
	RecordData Record;
	Record.push_back(SemaRef.MSStructPragmaOn ? PMSST_ON : PMSST_OFF);
	Stream.EmitRecord(MSSTRUCT_PRAGMA_OPTIONS, Record);
	}

	/// \brief Write the state of 'pragma pointers_to_members' at the end of the
	//module.
	void ASTWriter::WriteMSPointersToMembersPragmaOptions(Sema &SemaRef) {
	RecordData Record;
	Record.push_back(SemaRef.MSPointerToMemberRepresentationMethod);
	AddSourceLocation(SemaRef.ImplicitMSInheritanceAttrLoc, Record);
	Stream.EmitRecord(POINTERS_TO_MEMBERS_PRAGMA_OPTIONS, Record);
	}

	/// \brief Write the state of 'pragma pack' at the end of the module.
	void ASTWriter::WritePackPragmaOptions(Sema &SemaRef) {
	// Don't serialize pragma pack state for modules, since it should only take
	// effect on a per-submodule basis.
	if (WritingModule)
	return;

	RecordData Record;
	Record.push_back(SemaRef.PackStack.CurrentValue);
	AddSourceLocation(SemaRef.PackStack.CurrentPragmaLocation, Record);
	Record.push_back(SemaRef.PackStack.Stack.size());
	for (const auto &StackEntry : SemaRef.PackStack.Stack) {
	Record.push_back(StackEntry.Value);
	AddSourceLocation(StackEntry.PragmaLocation, Record);
	AddString(StackEntry.StackSlotLabel, Record);
	}
	Stream.EmitRecord(PACK_PRAGMA_OPTIONS, Record);
	}

	void ASTWriter::WriteModuleFileExtension(Sema &SemaRef,
	ModuleFileExtensionWriter &Writer) {
	// Enter the extension block.
	Stream.EnterSubblock(EXTENSION_BLOCK_ID, 4);

	// Emit the metadata record abbreviation.
	auto Abv = std::make_shared<llvm::BitCodeAbbrev>();
	Abv->Add(llvm::BitCodeAbbrevOp(EXTENSION_METADATA));
	Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
	Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
	Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
	Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
	Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob));
	unsigned Abbrev = Stream.EmitAbbrev(std::move(Abv));

	// Emit the metadata record.
	RecordData Record;
	auto Metadata = Writer.getExtension()->getExtensionMetadata();
	Record.push_back(EXTENSION_METADATA);
	Record.push_back(Metadata.MajorVersion);
	Record.push_back(Metadata.MinorVersion);
	Record.push_back(Metadata.BlockName.size());
	Record.push_back(Metadata.UserInfo.size());
	SmallString<64> Buffer;
	Buffer += Metadata.BlockName;
	Buffer += Metadata.UserInfo;
	Stream.EmitRecordWithBlob(Abbrev, Record, Buffer);

	// Emit the contents of the extension block.
	Writer.writeExtensionContents(SemaRef, Stream);

	// Exit the extension block.
	Stream.ExitBlock();
	}

	//===----------------------------------------------------------------------===//
	// General Serialization Routines
	//===----------------------------------------------------------------------===//

	/// \brief Emit the list of attributes to the specified record.
	void ASTRecordWriter::AddAttributes(ArrayRef<const Attr *> Attrs) {
	auto &Record = *this;
	Record.push_back(Attrs.size());
	for (const auto *A : Attrs) {
	Record.push_back(A->getKind()); // FIXME: stable encoding, target attrs
	Record.AddSourceRange(A->getRange());

	#include "clang/Serialization/AttrPCHWrite.inc"

	}
	}

	void ASTWriter::AddToken(const Token &Tok, RecordDataImpl &Record) {
	AddSourceLocation(Tok.getLocation(), Record);
	Record.push_back(Tok.getLength());

	// FIXME: When reading literal tokens, reconstruct the literal pointer
	// if it is needed.
	AddIdentifierRef(Tok.getIdentifierInfo(), Record);
	// FIXME: Should translate token kind to a stable encoding.
	Record.push_back(Tok.getKind());
	// FIXME: Should translate token flags to a stable encoding.
	Record.push_back(Tok.getFlags());
	}

	void ASTWriter::AddString(StringRef Str, RecordDataImpl &Record) {
	Record.push_back(Str.size());
	Record.insert(Record.end(), Str.begin(), Str.end());
	}

	bool ASTWriter::PreparePathForOutput(SmallVectorImpl<char> &Path) {
	assert(Context && "should have context when outputting path");

	bool Changed =
	cleanPathForOutput(Context->getSourceManager().getFileManager(), Path);

	// Remove a prefix to make the path relative, if relevant.
	const char *PathBegin = Path.data();
	const char *PathPtr =
	adjustFilenameForRelocatableAST(PathBegin, BaseDirectory);
	if (PathPtr != PathBegin) {
	Path.erase(Path.begin(), Path.begin() + (PathPtr - PathBegin));
	Changed = true;
	}

	return Changed;
	}

	void ASTWriter::AddPath(StringRef Path, RecordDataImpl &Record) {
	SmallString<128> FilePath(Path);
	PreparePathForOutput(FilePath);
	AddString(FilePath, Record);
	}

	void ASTWriter::EmitRecordWithPath(unsigned Abbrev, RecordDataRef Record,
	StringRef Path) {
	SmallString<128> FilePath(Path);
	PreparePathForOutput(FilePath);
	Stream.EmitRecordWithBlob(Abbrev, Record, FilePath);
	}

	void ASTWriter::AddVersionTuple(const VersionTuple &Version,
	RecordDataImpl &Record) {
	Record.push_back(Version.getMajor());
	if (Optional<unsigned> Minor = Version.getMinor())
	Record.push_back(*Minor + 1);
	else
	Record.push_back(0);
	if (Optional<unsigned> Subminor = Version.getSubminor())
	Record.push_back(*Subminor + 1);
	else
	Record.push_back(0);
	}

	/// \brief Note that the identifier II occurs at the given offset
	/// within the identifier table.
	void ASTWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) {
	IdentID ID = IdentifierIDs[II];
	// Only store offsets new to this AST file. Other identifier names are looked
	// up earlier in the chain and thus don't need an offset.
	if (ID >= FirstIdentID)
	IdentifierOffsets[ID - FirstIdentID] = Offset;
	}

	/// \brief Note that the selector Sel occurs at the given offset
	/// within the method pool/selector table.
	void ASTWriter::SetSelectorOffset(Selector Sel, uint32_t Offset) {
	unsigned ID = SelectorIDs[Sel];
	assert(ID && "Unknown selector");
	// Don't record offsets for selectors that are also available in a different
	// file.
	if (ID < FirstSelectorID)
	return;
	SelectorOffsets[ID - FirstSelectorID] = Offset;
	}

	ASTWriter::ASTWriter(llvm::BitstreamWriter &Stream,
	SmallVectorImpl<char> &Buffer, MemoryBufferCache &PCMCache,
	ArrayRef<std::shared_ptr<ModuleFileExtension>> Extensions,
	bool IncludeTimestamps)
	: Stream(Stream), Buffer(Buffer), PCMCache(PCMCache),
	IncludeTimestamps(IncludeTimestamps) {
	for (const auto &Ext : Extensions) {
	if (auto Writer = Ext->createExtensionWriter(*this))
	ModuleFileExtensionWriters.push_back(std::move(Writer));
	}
	}

	ASTWriter::~ASTWriter() {
	llvm::DeleteContainerSeconds(FileDeclIDs);
	}

	const LangOptions &ASTWriter::getLangOpts() const {
	assert(WritingAST && "can't determine lang opts when not writing AST");
	return Context->getLangOpts();
	}

	time_t ASTWriter::getTimestampForOutput(const FileEntry *E) const {
	return IncludeTimestamps ? E->getModificationTime() : 0;
	}

	ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef,
	const std::string &OutputFile,
	Module *WritingModule, StringRef isysroot,
	bool hasErrors) {
	WritingAST = true;

	ASTHasCompilerErrors = hasErrors;

	// Emit the file header.
	Stream.Emit((unsigned)'C', 8);
	Stream.Emit((unsigned)'P', 8);
	Stream.Emit((unsigned)'C', 8);
	Stream.Emit((unsigned)'H', 8);

	WriteBlockInfoBlock();

	Context = &SemaRef.Context;
	PP = &SemaRef.PP;
	this->WritingModule = WritingModule;
	ASTFileSignature Signature =
	WriteASTCore(SemaRef, isysroot, OutputFile, WritingModule);
	Context = nullptr;
	PP = nullptr;
	this->WritingModule = nullptr;
	this->BaseDirectory.clear();

	WritingAST = false;
	if (SemaRef.Context.getLangOpts().ImplicitModules && WritingModule) {
	// Construct MemoryBuffer and update buffer manager.
	PCMCache.addBuffer(OutputFile,
	llvm::MemoryBuffer::getMemBufferCopy(
	StringRef(Buffer.begin(), Buffer.size())));
	}
	return Signature;
	}

	template<typename Vector>
	static void AddLazyVectorDecls(ASTWriter &Writer, Vector &Vec,
	ASTWriter::RecordData &Record) {
	for (typename Vector::iterator I = Vec.begin(nullptr, true), E = Vec.end();
	I != E; ++I) {
	Writer.AddDeclRef(*I, Record);
	}
	}

	ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
	const std::string &OutputFile,
	Module *WritingModule) {
	using namespace llvm;

	bool isModule = WritingModule != nullptr;

	// Make sure that the AST reader knows to finalize itself.
	if (Chain)
	Chain->finalizeForWriting();

	ASTContext &Context = SemaRef.Context;
	Preprocessor &PP = SemaRef.PP;

	// Set up predefined declaration IDs.
	auto RegisterPredefDecl = [&] (Decl *D, PredefinedDeclIDs ID) {
	if (D) {
	assert(D->isCanonicalDecl() && "predefined decl is not canonical");
	DeclIDs[D] = ID;
	}
	};
	RegisterPredefDecl(Context.getTranslationUnitDecl(),
	PREDEF_DECL_TRANSLATION_UNIT_ID);
	RegisterPredefDecl(Context.ObjCIdDecl, PREDEF_DECL_OBJC_ID_ID);
	RegisterPredefDecl(Context.ObjCSelDecl, PREDEF_DECL_OBJC_SEL_ID);
	RegisterPredefDecl(Context.ObjCClassDecl, PREDEF_DECL_OBJC_CLASS_ID);
	RegisterPredefDecl(Context.ObjCProtocolClassDecl,
	PREDEF_DECL_OBJC_PROTOCOL_ID);
	RegisterPredefDecl(Context.Int128Decl, PREDEF_DECL_INT_128_ID);
	RegisterPredefDecl(Context.UInt128Decl, PREDEF_DECL_UNSIGNED_INT_128_ID);
	RegisterPredefDecl(Context.ObjCInstanceTypeDecl,
	PREDEF_DECL_OBJC_INSTANCETYPE_ID);
	RegisterPredefDecl(Context.BuiltinVaListDecl, PREDEF_DECL_BUILTIN_VA_LIST_ID);
	RegisterPredefDecl(Context.VaListTagDecl, PREDEF_DECL_VA_LIST_TAG);
	RegisterPredefDecl(Context.BuiltinMSVaListDecl,
	PREDEF_DECL_BUILTIN_MS_VA_LIST_ID);
	RegisterPredefDecl(Context.ExternCContext, PREDEF_DECL_EXTERN_C_CONTEXT_ID);
	RegisterPredefDecl(Context.MakeIntegerSeqDecl,
	PREDEF_DECL_MAKE_INTEGER_SEQ_ID);
	RegisterPredefDecl(Context.CFConstantStringTypeDecl,
	PREDEF_DECL_CF_CONSTANT_STRING_ID);
	RegisterPredefDecl(Context.CFConstantStringTagDecl,
	PREDEF_DECL_CF_CONSTANT_STRING_TAG_ID);
	RegisterPredefDecl(Context.TypePackElementDecl,
	PREDEF_DECL_TYPE_PACK_ELEMENT_ID);

	// Build a record containing all of the tentative definitions in this file, in
	// TentativeDefinitions order. Generally, this record will be empty for
	// headers.
	RecordData TentativeDefinitions;
	AddLazyVectorDecls(*this, SemaRef.TentativeDefinitions, TentativeDefinitions);

	// Build a record containing all of the file scoped decls in this file.
	RecordData UnusedFileScopedDecls;
	if (!isModule)
	AddLazyVectorDecls(*this, SemaRef.UnusedFileScopedDecls,
	UnusedFileScopedDecls);

	// Build a record containing all of the delegating constructors we still need
	// to resolve.
	RecordData DelegatingCtorDecls;
	if (!isModule)
	AddLazyVectorDecls(*this, SemaRef.DelegatingCtorDecls, DelegatingCtorDecls);

	// Write the set of weak, undeclared identifiers. We always write the
	// entire table, since later PCH files in a PCH chain are only interested in
	// the results at the end of the chain.
	RecordData WeakUndeclaredIdentifiers;
	for (auto &WeakUndeclaredIdentifier : SemaRef.WeakUndeclaredIdentifiers) {
	IdentifierInfo *II = WeakUndeclaredIdentifier.first;
	WeakInfo &WI = WeakUndeclaredIdentifier.second;
	AddIdentifierRef(II, WeakUndeclaredIdentifiers);
	AddIdentifierRef(WI.getAlias(), WeakUndeclaredIdentifiers);
	AddSourceLocation(WI.getLocation(), WeakUndeclaredIdentifiers);
	WeakUndeclaredIdentifiers.push_back(WI.getUsed());
	}

	// Build a record containing all of the ext_vector declarations.
	RecordData ExtVectorDecls;
	AddLazyVectorDecls(*this, SemaRef.ExtVectorDecls, ExtVectorDecls);

	// Build a record containing all of the VTable uses information.
	RecordData VTableUses;
	if (!SemaRef.VTableUses.empty()) {
	for (unsigned I = 0, N = SemaRef.VTableUses.size(); I != N; ++I) {
	AddDeclRef(SemaRef.VTableUses[I].first, VTableUses);
	AddSourceLocation(SemaRef.VTableUses[I].second, VTableUses);
	VTableUses.push_back(SemaRef.VTablesUsed[SemaRef.VTableUses[I].first]);
	}
	}

	// Build a record containing all of the UnusedLocalTypedefNameCandidates.
	RecordData UnusedLocalTypedefNameCandidates;
	for (const TypedefNameDecl *TD : SemaRef.UnusedLocalTypedefNameCandidates)
	AddDeclRef(TD, UnusedLocalTypedefNameCandidates);

	// Build a record containing all of pending implicit instantiations.
	RecordData PendingInstantiations;
	for (const auto &I : SemaRef.PendingInstantiations) {
	AddDeclRef(I.first, PendingInstantiations);
	AddSourceLocation(I.second, PendingInstantiations);
	}
	assert(SemaRef.PendingLocalImplicitInstantiations.empty() &&
	"There are local ones at end of translation unit!");

	// Build a record containing some declaration references.
	RecordData SemaDeclRefs;
	if (SemaRef.StdNamespace \|\| SemaRef.StdBadAlloc \|\| SemaRef.StdAlignValT) {
	AddDeclRef(SemaRef.getStdNamespace(), SemaDeclRefs);
	AddDeclRef(SemaRef.getStdBadAlloc(), SemaDeclRefs);
	AddDeclRef(SemaRef.getStdAlignValT(), SemaDeclRefs);
	}

	RecordData CUDASpecialDeclRefs;
	if (Context.getcudaConfigureCallDecl()) {
	AddDeclRef(Context.getcudaConfigureCallDecl(), CUDASpecialDeclRefs);
	}

	// Build a record containing all of the known namespaces.
	RecordData KnownNamespaces;
	for (const auto &I : SemaRef.KnownNamespaces) {
	if (!I.second)
	AddDeclRef(I.first, KnownNamespaces);
	}

	// Build a record of all used, undefined objects that require definitions.
	RecordData UndefinedButUsed;

	SmallVector<std::pair<NamedDecl *, SourceLocation>, 16> Undefined;
	SemaRef.getUndefinedButUsed(Undefined);
	for (const auto &I : Undefined) {
	AddDeclRef(I.first, UndefinedButUsed);
	AddSourceLocation(I.second, UndefinedButUsed);
	}

	// Build a record containing all delete-expressions that we would like to
	// analyze later in AST.
	RecordData DeleteExprsToAnalyze;

	for (const auto &DeleteExprsInfo :
	SemaRef.getMismatchingDeleteExpressions()) {
	AddDeclRef(DeleteExprsInfo.first, DeleteExprsToAnalyze);
	DeleteExprsToAnalyze.push_back(DeleteExprsInfo.second.size());
	for (const auto &DeleteLoc : DeleteExprsInfo.second) {
	AddSourceLocation(DeleteLoc.first, DeleteExprsToAnalyze);
	DeleteExprsToAnalyze.push_back(DeleteLoc.second);
	}
	}

	// Write the control block
	WriteControlBlock(PP, Context, isysroot, OutputFile);

	// Write the remaining AST contents.
	Stream.EnterSubblock(AST_BLOCK_ID, 5);

	// This is so that older clang versions, before the introduction
	// of the control block, can read and reject the newer PCH format.
	{
	RecordData Record = {VERSION_MAJOR};
	Stream.EmitRecord(METADATA_OLD_FORMAT, Record);
	}

	// Create a lexical update block containing all of the declarations in the
	// translation unit that do not come from other AST files.
	const TranslationUnitDecl *TU = Context.getTranslationUnitDecl();
	SmallVector<uint32_t, 128> NewGlobalKindDeclPairs;
	for (const auto *D : TU->noload_decls()) {
	if (!D->isFromASTFile()) {
	NewGlobalKindDeclPairs.push_back(D->getKind());
	NewGlobalKindDeclPairs.push_back(GetDeclRef(D));
	}
	}

	auto Abv = std::make_shared<BitCodeAbbrev>();
	Abv->Add(llvm::BitCodeAbbrevOp(TU_UPDATE_LEXICAL));
	Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob));
	unsigned TuUpdateLexicalAbbrev = Stream.EmitAbbrev(std::move(Abv));
	{
	RecordData::value_type Record[] = {TU_UPDATE_LEXICAL};
	Stream.EmitRecordWithBlob(TuUpdateLexicalAbbrev, Record,
	bytes(NewGlobalKindDeclPairs));
	}

	// And a visible updates block for the translation unit.
	Abv = std::make_shared<BitCodeAbbrev>();
	Abv->Add(llvm::BitCodeAbbrevOp(UPDATE_VISIBLE));
	Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
	Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob));
	UpdateVisibleAbbrev = Stream.EmitAbbrev(std::move(Abv));
	WriteDeclContextVisibleUpdate(TU);

	// If we have any extern "C" names, write out a visible update for them.
	if (Context.ExternCContext)
	WriteDeclContextVisibleUpdate(Context.ExternCContext);

	// If the translation unit has an anonymous namespace, and we don't already
	// have an update block for it, write it as an update block.
	// FIXME: Why do we not do this if there's already an update block?
	if (NamespaceDecl *NS = TU->getAnonymousNamespace()) {
	ASTWriter::UpdateRecord &Record = DeclUpdates[TU];
	if (Record.empty())
	Record.push_back(DeclUpdate(UPD_CXX_ADDED_ANONYMOUS_NAMESPACE, NS));
	}

	// Add update records for all mangling numbers and static local numbers.
	// These aren't really update records, but this is a convenient way of
	// tagging this rare extra data onto the declarations.
	for (const auto &Number : Context.MangleNumbers)
	if (!Number.first->isFromASTFile())
	DeclUpdates[Number.first].push_back(DeclUpdate(UPD_MANGLING_NUMBER,
	Number.second));
	for (const auto &Number : Context.StaticLocalNumbers)
	if (!Number.first->isFromASTFile())
	DeclUpdates[Number.first].push_back(DeclUpdate(UPD_STATIC_LOCAL_NUMBER,
	Number.second));

	// Make sure visible decls, added to DeclContexts previously loaded from
	// an AST file, are registered for serialization. Likewise for template
	// specializations added to imported templates.
	for (const auto *I : DeclsToEmitEvenIfUnreferenced) {
	GetDeclRef(I);
	}

	// Make sure all decls associated with an identifier are registered for
	// serialization, if we're storing decls with identifiers.
	if (!WritingModule \|\| !getLangOpts().CPlusPlus) {
	llvm::SmallVector<const IdentifierInfo*, 256> IIs;
	for (const auto &ID : PP.getIdentifierTable()) {
	const IdentifierInfo *II = ID.second;
	if (!Chain \|\| !II->isFromAST() \|\| II->hasChangedSinceDeserialization())
	IIs.push_back(II);
	}
	// Sort the identifiers to visit based on their name.
	std::sort(IIs.begin(), IIs.end(), llvm::less_ptr<IdentifierInfo>());
	for (const IdentifierInfo *II : IIs) {
	for (IdentifierResolver::iterator D = SemaRef.IdResolver.begin(II),
	DEnd = SemaRef.IdResolver.end();
	D != DEnd; ++D) {
	GetDeclRef(*D);
	}
	}
	}

	// For method pool in the module, if it contains an entry for a selector,
	// the entry should be complete, containing everything introduced by that
	// module and all modules it imports. It's possible that the entry is out of
	// date, so we need to pull in the new content here.

	// It's possible that updateOutOfDateSelector can update SelectorIDs. To be
	// safe, we copy all selectors out.
	llvm::SmallVector<Selector, 256> AllSelectors;
	for (auto &SelectorAndID : SelectorIDs)
	AllSelectors.push_back(SelectorAndID.first);
	for (auto &Selector : AllSelectors)
	SemaRef.updateOutOfDateSelector(Selector);

	// Form the record of special types.
	RecordData SpecialTypes;
	AddTypeRef(Context.getRawCFConstantStringType(), SpecialTypes);
	AddTypeRef(Context.getFILEType(), SpecialTypes);
	AddTypeRef(Context.getjmp_bufType(), SpecialTypes);
	AddTypeRef(Context.getsigjmp_bufType(), SpecialTypes);
	AddTypeRef(Context.ObjCIdRedefinitionType, SpecialTypes);
	AddTypeRef(Context.ObjCClassRedefinitionType, SpecialTypes);
	AddTypeRef(Context.ObjCSelRedefinitionType, SpecialTypes);
	AddTypeRef(Context.getucontext_tType(), SpecialTypes);

	if (Chain) {
	// Write the mapping information describing our module dependencies and how
	// each of those modules were mapped into our own offset/ID space, so that
	// the reader can build the appropriate mapping to its own offset/ID space.
	// The map consists solely of a blob with the following format:
	// (module-name-len:i16 module-name:leni8
	// source-location-offset:i32
	// identifier-id:i32
	// preprocessed-entity-id:i32
	// macro-definition-id:i32
	// submodule-id:i32
	// selector-id:i32
	// declaration-id:i32
	// c++-base-specifiers-id:i32
	// type-id:i32)
	//
	auto Abbrev = std::make_shared<BitCodeAbbrev>();
	Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP));
	Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
	unsigned ModuleOffsetMapAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
	SmallString<2048> Buffer;
	{
	llvm::raw_svector_ostream Out(Buffer);
	for (ModuleFile &M : Chain->ModuleMgr) {
	using namespace llvm::support;
	endian::Writer<little> LE(Out);
	StringRef FileName = M.FileName;
	LE.write<uint16_t>(FileName.size());
	Out.write(FileName.data(), FileName.size());

	// Note: if a base ID was uint max, it would not be possible to load
	// another module after it or have more than one entity inside it.
	uint32_t None = std::numeric_limits<uint32_t>::max();

	auto writeBaseIDOrNone = [&](uint32_t BaseID, bool ShouldWrite) {
	assert(BaseID < std::numeric_limits<uint32_t>::max() && "base id too high");
	if (ShouldWrite)
	LE.write<uint32_t>(BaseID);
	else
	LE.write<uint32_t>(None);
	};

	// These values should be unique within a chain, since they will be read
	// as keys into ContinuousRangeMaps.
	writeBaseIDOrNone(M.SLocEntryBaseOffset, M.LocalNumSLocEntries);
	writeBaseIDOrNone(M.BaseIdentifierID, M.LocalNumIdentifiers);
	writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros);
	writeBaseIDOrNone(M.BasePreprocessedEntityID,
	M.NumPreprocessedEntities);
	writeBaseIDOrNone(M.BaseSubmoduleID, M.LocalNumSubmodules);
	writeBaseIDOrNone(M.BaseSelectorID, M.LocalNumSelectors);
	writeBaseIDOrNone(M.BaseDeclID, M.LocalNumDecls);
	writeBaseIDOrNone(M.BaseTypeIndex, M.LocalNumTypes);
	}
	}
	RecordData::value_type Record[] = {MODULE_OFFSET_MAP};
	Stream.EmitRecordWithBlob(ModuleOffsetMapAbbrev, Record,
	Buffer.data(), Buffer.size());
	}

	RecordData DeclUpdatesOffsetsRecord;

	// Keep writing types, declarations, and declaration update records
	// until we've emitted all of them.
	Stream.EnterSubblock(DECLTYPES_BLOCK_ID, /bits for abbreviations/5);
	WriteTypeAbbrevs();
	WriteDeclAbbrevs();
	do {
	WriteDeclUpdatesBlocks(DeclUpdatesOffsetsRecord);
	while (!DeclTypesToEmit.empty()) {
	DeclOrType DOT = DeclTypesToEmit.front();
	DeclTypesToEmit.pop();
	if (DOT.isType())
	WriteType(DOT.getType());
	else
	WriteDecl(Context, DOT.getDecl());
	}
	} while (!DeclUpdates.empty());
	Stream.ExitBlock();

	DoneWritingDeclsAndTypes = true;

	// These things can only be done once we've written out decls and types.
	WriteTypeDeclOffsets();
	if (!DeclUpdatesOffsetsRecord.empty())
	Stream.EmitRecord(DECL_UPDATE_OFFSETS, DeclUpdatesOffsetsRecord);
	WriteFileDeclIDsMap();
	WriteSourceManagerBlock(Context.getSourceManager(), PP);
	WriteComments();
	WritePreprocessor(PP, isModule);
	WriteHeaderSearch(PP.getHeaderSearchInfo());
	WriteSelectors(SemaRef);
	WriteReferencedSelectorsPool(SemaRef);
	WriteLateParsedTemplates(SemaRef);
	WriteIdentifierTable(PP, SemaRef.IdResolver, isModule);
	WriteFPPragmaOptions(SemaRef.getFPOptions());
	WriteOpenCLExtensions(SemaRef);
	WriteOpenCLExtensionTypes(SemaRef);
	WriteOpenCLExtensionDecls(SemaRef);
	WriteCUDAPragmas(SemaRef);

	// If we're emitting a module, write out the submodule information.
	if (WritingModule)
	WriteSubmodules(WritingModule);

	Stream.EmitRecord(SPECIAL_TYPES, SpecialTypes);

	// Write the record containing external, unnamed definitions.
	if (!EagerlyDeserializedDecls.empty())
	Stream.EmitRecord(EAGERLY_DESERIALIZED_DECLS, EagerlyDeserializedDecls);

	if (!ModularCodegenDecls.empty())
	Stream.EmitRecord(MODULAR_CODEGEN_DECLS, ModularCodegenDecls);

	// Write the record containing tentative definitions.
	if (!TentativeDefinitions.empty())
	Stream.EmitRecord(TENTATIVE_DEFINITIONS, TentativeDefinitions);

	// Write the record containing unused file scoped decls.
	if (!UnusedFileScopedDecls.empty())
	Stream.EmitRecord(UNUSED_FILESCOPED_DECLS, UnusedFileScopedDecls);

	// Write the record containing weak undeclared identifiers.
	if (!WeakUndeclaredIdentifiers.empty())
	Stream.EmitRecord(WEAK_UNDECLARED_IDENTIFIERS,
	WeakUndeclaredIdentifiers);

	// Write the record containing ext_vector type names.
	if (!ExtVectorDecls.empty())
	Stream.EmitRecord(EXT_VECTOR_DECLS, ExtVectorDecls);

	// Write the record containing VTable uses information.
	if (!VTableUses.empty())
	Stream.EmitRecord(VTABLE_USES, VTableUses);

	// Write the record containing potentially unused local typedefs.
	if (!UnusedLocalTypedefNameCandidates.empty())
	Stream.EmitRecord(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES,
	UnusedLocalTypedefNameCandidates);

	// Write the record containing pending implicit instantiations.
	if (!PendingInstantiations.empty())
	Stream.EmitRecord(PENDING_IMPLICIT_INSTANTIATIONS, PendingInstantiations);

	// Write the record containing declaration references of Sema.
	if (!SemaDeclRefs.empty())
	Stream.EmitRecord(SEMA_DECL_REFS, SemaDeclRefs);

	// Write the record containing CUDA-specific declaration references.
	if (!CUDASpecialDeclRefs.empty())
	Stream.EmitRecord(CUDA_SPECIAL_DECL_REFS, CUDASpecialDeclRefs);

	// Write the delegating constructors.
	if (!DelegatingCtorDecls.empty())
	Stream.EmitRecord(DELEGATING_CTORS, DelegatingCtorDecls);

	// Write the known namespaces.
	if (!KnownNamespaces.empty())
	Stream.EmitRecord(KNOWN_NAMESPACES, KnownNamespaces);

	// Write the undefined internal functions and variables, and inline functions.
	if (!UndefinedButUsed.empty())
	Stream.EmitRecord(UNDEFINED_BUT_USED, UndefinedButUsed);

	if (!DeleteExprsToAnalyze.empty())
	Stream.EmitRecord(DELETE_EXPRS_TO_ANALYZE, DeleteExprsToAnalyze);

	// Write the visible updates to DeclContexts.
	for (auto *DC : UpdatedDeclContexts)
	WriteDeclContextVisibleUpdate(DC);

	if (!WritingModule) {
	// Write the submodules that were imported, if any.
	struct ModuleInfo {
	uint64_t ID;
	Module *M;
	ModuleInfo(uint64_t ID, Module *M) : ID(ID), M(M) {}
	};
	llvm::SmallVector<ModuleInfo, 64> Imports;
	for (const auto *I : Context.local_imports()) {
	assert(SubmoduleIDs.find(I->getImportedModule()) != SubmoduleIDs.end());
	Imports.push_back(ModuleInfo(SubmoduleIDs[I->getImportedModule()],
	I->getImportedModule()));
	}

	if (!Imports.empty()) {
	auto Cmp = [](const ModuleInfo &A, const ModuleInfo &B) {
	return A.ID < B.ID;
	};
	auto Eq = [](const ModuleInfo &A, const ModuleInfo &B) {
	return A.ID == B.ID;
	};

	// Sort and deduplicate module IDs.
	std::sort(Imports.begin(), Imports.end(), Cmp);
	Imports.erase(std::unique(Imports.begin(), Imports.end(), Eq),
	Imports.end());

	RecordData ImportedModules;
	for (const auto &Import : Imports) {
	ImportedModules.push_back(Import.ID);
	// FIXME: If the module has macros imported then later has declarations
	// imported, this location won't be the right one as a location for the
	// declaration imports.
	AddSourceLocation(PP.getModuleImportLoc(Import.M), ImportedModules);
	}

	Stream.EmitRecord(IMPORTED_MODULES, ImportedModules);
	}
	}

	WriteObjCCategories();
	if(!WritingModule) {
	WriteOptimizePragmaOptions(SemaRef);
	WriteMSStructPragmaOptions(SemaRef);
	WriteMSPointersToMembersPragmaOptions(SemaRef);
	}
	WritePackPragmaOptions(SemaRef);

	// Some simple statistics
	RecordData::value_type Record[] = {
	NumStatements, NumMacros, NumLexicalDeclContexts, NumVisibleDeclContexts};
	Stream.EmitRecord(STATISTICS, Record);
	Stream.ExitBlock();

	// Write the module file extension blocks.
	for (const auto &ExtWriter : ModuleFileExtensionWriters)
	WriteModuleFileExtension(SemaRef, *ExtWriter);

	return writeUnhashedControlBlock(PP, Context);
	}

	void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
	if (DeclUpdates.empty())
	return;

	DeclUpdateMap LocalUpdates;
	LocalUpdates.swap(DeclUpdates);

	for (auto &DeclUpdate : LocalUpdates) {
	const Decl *D = DeclUpdate.first;

	bool HasUpdatedBody = false;
	RecordData RecordData;
	ASTRecordWriter Record(*this, RecordData);
	for (auto &Update : DeclUpdate.second) {
	DeclUpdateKind Kind = (DeclUpdateKind)Update.getKind();

	// An updated body is emitted last, so that the reader doesn't need
	// to skip over the lazy body to reach statements for other records.
	if (Kind == UPD_CXX_ADDED_FUNCTION_DEFINITION)
	HasUpdatedBody = true;
	else
	Record.push_back(Kind);

	switch (Kind) {
	case UPD_CXX_ADDED_IMPLICIT_MEMBER:
	case UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION:
	case UPD_CXX_ADDED_ANONYMOUS_NAMESPACE:
	assert(Update.getDecl() && "no decl to add?");
	Record.push_back(GetDeclRef(Update.getDecl()));
	break;

	case UPD_CXX_ADDED_FUNCTION_DEFINITION:
	break;

	case UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER: {
	const VarDecl *VD = cast<VarDecl>(D);
	Record.AddSourceLocation(Update.getLoc());
	if (VD->getInit()) {
	Record.push_back(!VD->isInitKnownICE() ? 1
	: (VD->isInitICE() ? 3 : 2));
	Record.AddStmt(const_cast<Expr*>(VD->getInit()));
	} else {
	Record.push_back(0);
	}
	break;
	}

	case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT:
	Record.AddStmt(const_cast<Expr *>(
	cast<ParmVarDecl>(Update.getDecl())->getDefaultArg()));
	break;

	case UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER:
	Record.AddStmt(
	cast<FieldDecl>(Update.getDecl())->getInClassInitializer());
	break;

	case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: {
	auto *RD = cast<CXXRecordDecl>(D);
	UpdatedDeclContexts.insert(RD->getPrimaryContext());
	Record.AddCXXDefinitionData(RD);
	Record.AddOffset(WriteDeclContextLexicalBlock(
	Context, const_cast<CXXRecordDecl >(RD)));

	// This state is sometimes updated by template instantiation, when we
	// switch from the specialization referring to the template declaration
	// to it referring to the template definition.
	if (auto *MSInfo = RD->getMemberSpecializationInfo()) {
	Record.push_back(MSInfo->getTemplateSpecializationKind());
	Record.AddSourceLocation(MSInfo->getPointOfInstantiation());
	} else {
	auto *Spec = cast<ClassTemplateSpecializationDecl>(RD);
	Record.push_back(Spec->getTemplateSpecializationKind());
	Record.AddSourceLocation(Spec->getPointOfInstantiation());

	// The instantiation might have been resolved to a partial
	// specialization. If so, record which one.
	auto From = Spec->getInstantiatedFrom();
	if (auto PartialSpec =
	From.dyn_cast<ClassTemplatePartialSpecializationDecl*>()) {
	Record.push_back(true);
	Record.AddDeclRef(PartialSpec);
	Record.AddTemplateArgumentList(
	&Spec->getTemplateInstantiationArgs());
	} else {
	Record.push_back(false);
	}
	}
	Record.push_back(RD->getTagKind());
	Record.AddSourceLocation(RD->getLocation());
	Record.AddSourceLocation(RD->getLocStart());
	Record.AddSourceRange(RD->getBraceRange());

	// Instantiation may change attributes; write them all out afresh.
	Record.push_back(D->hasAttrs());
	if (D->hasAttrs())
	Record.AddAttributes(D->getAttrs());

	// FIXME: Ensure we don't get here for explicit instantiations.
	break;
	}

	case UPD_CXX_RESOLVED_DTOR_DELETE:
	Record.AddDeclRef(Update.getDecl());
	break;

	case UPD_CXX_RESOLVED_EXCEPTION_SPEC:
	addExceptionSpec(
	cast<FunctionDecl>(D)->getType()->castAs<FunctionProtoType>(),
	Record);
	break;

	case UPD_CXX_DEDUCED_RETURN_TYPE:
	Record.push_back(GetOrCreateTypeID(Update.getType()));
	break;

	case UPD_DECL_MARKED_USED:
	break;

	case UPD_MANGLING_NUMBER:
	case UPD_STATIC_LOCAL_NUMBER:
	Record.push_back(Update.getNumber());
	break;

	case UPD_DECL_MARKED_OPENMP_THREADPRIVATE:
	Record.AddSourceRange(
	D->getAttr<OMPThreadPrivateDeclAttr>()->getRange());
	break;

	case UPD_DECL_MARKED_OPENMP_DECLARETARGET:
	Record.AddSourceRange(
	D->getAttr<OMPDeclareTargetDeclAttr>()->getRange());
	break;

	case UPD_DECL_EXPORTED:
	Record.push_back(getSubmoduleID(Update.getModule()));
	break;

	case UPD_ADDED_ATTR_TO_RECORD:
	Record.AddAttributes(llvm::makeArrayRef(Update.getAttr()));
	break;
	}
	}

	if (HasUpdatedBody) {
	const auto *Def = cast<FunctionDecl>(D);
	Record.push_back(UPD_CXX_ADDED_FUNCTION_DEFINITION);
	Record.push_back(Def->isInlined());
	Record.AddSourceLocation(Def->getInnerLocStart());
	Record.AddFunctionDefinition(Def);
	}

	OffsetsRecord.push_back(GetDeclRef(D));
	OffsetsRecord.push_back(Record.Emit(DECL_UPDATES));
	}
	}

	void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record) {
	uint32_t Raw = Loc.getRawEncoding();
	Record.push_back((Raw << 1) \| (Raw >> 31));
	}

	void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record) {
	AddSourceLocation(Range.getBegin(), Record);
	AddSourceLocation(Range.getEnd(), Record);
	}

	void ASTRecordWriter::AddAPInt(const llvm::APInt &Value) {
	Record->push_back(Value.getBitWidth());
	const uint64_t *Words = Value.getRawData();
	Record->append(Words, Words + Value.getNumWords());
	}

	void ASTRecordWriter::AddAPSInt(const llvm::APSInt &Value) {
	Record->push_back(Value.isUnsigned());
	AddAPInt(Value);
	}

	void ASTRecordWriter::AddAPFloat(const llvm::APFloat &Value) {
	AddAPInt(Value.bitcastToAPInt());
	}

	void ASTWriter::AddIdentifierRef(const IdentifierInfo *II, RecordDataImpl &Record) {
	Record.push_back(getIdentifierRef(II));
	}

	IdentID ASTWriter::getIdentifierRef(const IdentifierInfo *II) {
	if (!II)
	return 0;

	IdentID &ID = IdentifierIDs[II];
	if (ID == 0)
	ID = NextIdentID++;
	return ID;
	}

	MacroID ASTWriter::getMacroRef(MacroInfo MI, const IdentifierInfo Name) {
	// Don't emit builtin macros like __LINE__ to the AST file unless they
	// have been redefined by the header (in which case they are not
	// isBuiltinMacro).
	if (!MI \|\| MI->isBuiltinMacro())
	return 0;

	MacroID &ID = MacroIDs[MI];
	if (ID == 0) {
	ID = NextMacroID++;
	MacroInfoToEmitData Info = { Name, MI, ID };
	MacroInfosToEmit.push_back(Info);
	}
	return ID;
	}

	MacroID ASTWriter::getMacroID(MacroInfo *MI) {
	if (!MI \|\| MI->isBuiltinMacro())
	return 0;

	assert(MacroIDs.find(MI) != MacroIDs.end() && "Macro not emitted!");
	return MacroIDs[MI];
	}

	uint64_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) {
	return IdentMacroDirectivesOffsetMap.lookup(Name);
	}

	void ASTRecordWriter::AddSelectorRef(const Selector SelRef) {
	Record->push_back(Writer->getSelectorRef(SelRef));
	}

	SelectorID ASTWriter::getSelectorRef(Selector Sel) {
	if (Sel.getAsOpaquePtr() == nullptr) {
	return 0;
	}

	SelectorID SID = SelectorIDs[Sel];
	if (SID == 0 && Chain) {
	// This might trigger a ReadSelector callback, which will set the ID for
	// this selector.
	Chain->LoadSelector(Sel);
	SID = SelectorIDs[Sel];
	}
	if (SID == 0) {
	SID = NextSelectorID++;
	SelectorIDs[Sel] = SID;
	}
	return SID;
	}

	void ASTRecordWriter::AddCXXTemporary(const CXXTemporary *Temp) {
	AddDeclRef(Temp->getDestructor());
	}

	void ASTRecordWriter::AddTemplateArgumentLocInfo(
	TemplateArgument::ArgKind Kind, const TemplateArgumentLocInfo &Arg) {
	switch (Kind) {
	case TemplateArgument::Expression:
	AddStmt(Arg.getAsExpr());
	break;
	case TemplateArgument::Type:
	AddTypeSourceInfo(Arg.getAsTypeSourceInfo());
	break;
	case TemplateArgument::Template:
	AddNestedNameSpecifierLoc(Arg.getTemplateQualifierLoc());
	AddSourceLocation(Arg.getTemplateNameLoc());
	break;
	case TemplateArgument::TemplateExpansion:
	AddNestedNameSpecifierLoc(Arg.getTemplateQualifierLoc());
	AddSourceLocation(Arg.getTemplateNameLoc());
	AddSourceLocation(Arg.getTemplateEllipsisLoc());
	break;
	case TemplateArgument::Null:
	case TemplateArgument::Integral:
	case TemplateArgument::Declaration:
	case TemplateArgument::NullPtr:
	case TemplateArgument::Pack:
	// FIXME: Is this right?
	break;
	}
	}

	void ASTRecordWriter::AddTemplateArgumentLoc(const TemplateArgumentLoc &Arg) {
	AddTemplateArgument(Arg.getArgument());

	if (Arg.getArgument().getKind() == TemplateArgument::Expression) {
	bool InfoHasSameExpr
	= Arg.getArgument().getAsExpr() == Arg.getLocInfo().getAsExpr();
	Record->push_back(InfoHasSameExpr);
	if (InfoHasSameExpr)
	return; // Avoid storing the same expr twice.
	}
	AddTemplateArgumentLocInfo(Arg.getArgument().getKind(), Arg.getLocInfo());
	}

	void ASTRecordWriter::AddTypeSourceInfo(TypeSourceInfo *TInfo) {
	if (!TInfo) {
	AddTypeRef(QualType());
	return;
	}

	AddTypeLoc(TInfo->getTypeLoc());
	}

	void ASTRecordWriter::AddTypeLoc(TypeLoc TL) {
	AddTypeRef(TL.getType());

	TypeLocWriter TLW(*this);
	for (; !TL.isNull(); TL = TL.getNextTypeLoc())
	TLW.Visit(TL);
	}

	void ASTWriter::AddTypeRef(QualType T, RecordDataImpl &Record) {
	Record.push_back(GetOrCreateTypeID(T));
	}

	TypeID ASTWriter::GetOrCreateTypeID(QualType T) {
	assert(Context);
	return MakeTypeID(*Context, T, [&](QualType T) -> TypeIdx {
	if (T.isNull())
	return TypeIdx();
	assert(!T.getLocalFastQualifiers());

	TypeIdx &Idx = TypeIdxs[T];
	if (Idx.getIndex() == 0) {
	if (DoneWritingDeclsAndTypes) {
	assert(0 && "New type seen after serializing all the types to emit!");
	return TypeIdx();
	}

	// We haven't seen this type before. Assign it a new ID and put it
	// into the queue of types to emit.
	Idx = TypeIdx(NextTypeID++);
	DeclTypesToEmit.push(T);
	}
	return Idx;
	});
	}

	TypeID ASTWriter::getTypeID(QualType T) const {
	assert(Context);
	return MakeTypeID(*Context, T, [&](QualType T) -> TypeIdx {
	if (T.isNull())
	return TypeIdx();
	assert(!T.getLocalFastQualifiers());

	TypeIdxMap::const_iterator I = TypeIdxs.find(T);
	assert(I != TypeIdxs.end() && "Type not emitted!");
	return I->second;
	});
	}

	void ASTWriter::AddDeclRef(const Decl *D, RecordDataImpl &Record) {
	Record.push_back(GetDeclRef(D));
	}

	DeclID ASTWriter::GetDeclRef(const Decl *D) {
	assert(WritingAST && "Cannot request a declaration ID before AST writing");

	if (!D) {
	return 0;
	}

	// If D comes from an AST file, its declaration ID is already known and
	// fixed.
	if (D->isFromASTFile())
	return D->getGlobalID();

	assert(!(reinterpret_cast<uintptr_t>(D) & 0x01) && "Invalid decl pointer");
	DeclID &ID = DeclIDs[D];
	if (ID == 0) {
	if (DoneWritingDeclsAndTypes) {
	assert(0 && "New decl seen after serializing all the decls to emit!");
	return 0;
	}

	// We haven't seen this declaration before. Give it a new ID and
	// enqueue it in the list of declarations to emit.
	ID = NextDeclID++;
	DeclTypesToEmit.push(const_cast<Decl *>(D));
	}

	return ID;
	}

	DeclID ASTWriter::getDeclID(const Decl *D) {
	if (!D)
	return 0;

	// If D comes from an AST file, its declaration ID is already known and
	// fixed.
	if (D->isFromASTFile())
	return D->getGlobalID();

	assert(DeclIDs.find(D) != DeclIDs.end() && "Declaration not emitted!");
	return DeclIDs[D];
	}

	void ASTWriter::associateDeclWithFile(const Decl *D, DeclID ID) {
	assert(ID);
	assert(D);

	SourceLocation Loc = D->getLocation();
	if (Loc.isInvalid())
	return;

	// We only keep track of the file-level declarations of each file.
	if (!D->getLexicalDeclContext()->isFileContext())
	return;
	// FIXME: ParmVarDecls that are part of a function type of a parameter of
	// a function/objc method, should not have TU as lexical context.
	if (isa<ParmVarDecl>(D))
	return;

	SourceManager &SM = Context->getSourceManager();
	SourceLocation FileLoc = SM.getFileLoc(Loc);
	assert(SM.isLocalSourceLocation(FileLoc));
	FileID FID;
	unsigned Offset;
	std::tie(FID, Offset) = SM.getDecomposedLoc(FileLoc);
	if (FID.isInvalid())
	return;
	assert(SM.getSLocEntry(FID).isFile());

	DeclIDInFileInfo *&Info = FileDeclIDs[FID];
	if (!Info)
	Info = new DeclIDInFileInfo();

	std::pair<unsigned, serialization::DeclID> LocDecl(Offset, ID);
	LocDeclIDsTy &Decls = Info->DeclIDs;

	if (Decls.empty() \|\| Decls.back().first <= Offset) {
	Decls.push_back(LocDecl);
	return;
	}

	LocDeclIDsTy::iterator I =
	std::upper_bound(Decls.begin(), Decls.end(), LocDecl, llvm::less_first());

	Decls.insert(I, LocDecl);
	}

	void ASTRecordWriter::AddDeclarationName(DeclarationName Name) {
	// FIXME: Emit a stable enum for NameKind. 0 = Identifier etc.
	Record->push_back(Name.getNameKind());
	switch (Name.getNameKind()) {
	case DeclarationName::Identifier:
	AddIdentifierRef(Name.getAsIdentifierInfo());
	break;

	case DeclarationName::ObjCZeroArgSelector:
	case DeclarationName::ObjCOneArgSelector:
	case DeclarationName::ObjCMultiArgSelector:
	AddSelectorRef(Name.getObjCSelector());
	break;

	case DeclarationName::CXXConstructorName:
	case DeclarationName::CXXDestructorName:
	case DeclarationName::CXXConversionFunctionName:
	AddTypeRef(Name.getCXXNameType());
	break;

	case DeclarationName::CXXDeductionGuideName:
	AddDeclRef(Name.getCXXDeductionGuideTemplate());
	break;

	case DeclarationName::CXXOperatorName:
	Record->push_back(Name.getCXXOverloadedOperator());
	break;

	case DeclarationName::CXXLiteralOperatorName:
	AddIdentifierRef(Name.getCXXLiteralIdentifier());
	break;

	case DeclarationName::CXXUsingDirective:
	// No extra data to emit
	break;
	}
	}

	unsigned ASTWriter::getAnonymousDeclarationNumber(const NamedDecl *D) {
	assert(needsAnonymousDeclarationNumber(D) &&
	"expected an anonymous declaration");

	// Number the anonymous declarations within this context, if we've not
	// already done so.
	auto It = AnonymousDeclarationNumbers.find(D);
	if (It == AnonymousDeclarationNumbers.end()) {
	auto *DC = D->getLexicalDeclContext();
	numberAnonymousDeclsWithin(DC, [&](const NamedDecl *ND, unsigned Number) {
	AnonymousDeclarationNumbers[ND] = Number;
	});

	It = AnonymousDeclarationNumbers.find(D);
	assert(It != AnonymousDeclarationNumbers.end() &&
	"declaration not found within its lexical context");
	}

	return It->second;
	}

	void ASTRecordWriter::AddDeclarationNameLoc(const DeclarationNameLoc &DNLoc,
	DeclarationName Name) {
	switch (Name.getNameKind()) {
	case DeclarationName::CXXConstructorName:
	case DeclarationName::CXXDestructorName:
	case DeclarationName::CXXConversionFunctionName:
	AddTypeSourceInfo(DNLoc.NamedType.TInfo);
	break;

	case DeclarationName::CXXOperatorName:
	AddSourceLocation(SourceLocation::getFromRawEncoding(
	DNLoc.CXXOperatorName.BeginOpNameLoc));
	AddSourceLocation(
	SourceLocation::getFromRawEncoding(DNLoc.CXXOperatorName.EndOpNameLoc));
	break;

	case DeclarationName::CXXLiteralOperatorName:
	AddSourceLocation(SourceLocation::getFromRawEncoding(
	DNLoc.CXXLiteralOperatorName.OpNameLoc));
	break;

	case DeclarationName::Identifier:
	case DeclarationName::ObjCZeroArgSelector:
	case DeclarationName::ObjCOneArgSelector:
	case DeclarationName::ObjCMultiArgSelector:
	case DeclarationName::CXXUsingDirective:
	case DeclarationName::CXXDeductionGuideName:
	break;
	}
	}

	void ASTRecordWriter::AddDeclarationNameInfo(
	const DeclarationNameInfo &NameInfo) {
	AddDeclarationName(NameInfo.getName());
	AddSourceLocation(NameInfo.getLoc());
	AddDeclarationNameLoc(NameInfo.getInfo(), NameInfo.getName());
	}

	void ASTRecordWriter::AddQualifierInfo(const QualifierInfo &Info) {
	AddNestedNameSpecifierLoc(Info.QualifierLoc);
	Record->push_back(Info.NumTemplParamLists);
	for (unsigned i = 0, e = Info.NumTemplParamLists; i != e; ++i)
	AddTemplateParameterList(Info.TemplParamLists[i]);
	}

	void ASTRecordWriter::AddNestedNameSpecifier(NestedNameSpecifier *NNS) {
	// Nested name specifiers usually aren't too long. I think that 8 would
	// typically accommodate the vast majority.
	SmallVector<NestedNameSpecifier *, 8> NestedNames;

	// Push each of the NNS's onto a stack for serialization in reverse order.
	while (NNS) {
	NestedNames.push_back(NNS);
	NNS = NNS->getPrefix();
	}

	Record->push_back(NestedNames.size());
	while(!NestedNames.empty()) {
	NNS = NestedNames.pop_back_val();
	NestedNameSpecifier::SpecifierKind Kind = NNS->getKind();
	Record->push_back(Kind);
	switch (Kind) {
	case NestedNameSpecifier::Identifier:
	AddIdentifierRef(NNS->getAsIdentifier());
	break;

	case NestedNameSpecifier::Namespace:
	AddDeclRef(NNS->getAsNamespace());
	break;

	case NestedNameSpecifier::NamespaceAlias:
	AddDeclRef(NNS->getAsNamespaceAlias());
	break;

	case NestedNameSpecifier::TypeSpec:
	case NestedNameSpecifier::TypeSpecWithTemplate:
	AddTypeRef(QualType(NNS->getAsType(), 0));
	Record->push_back(Kind == NestedNameSpecifier::TypeSpecWithTemplate);
	break;

	case NestedNameSpecifier::Global:
	// Don't need to write an associated value.
	break;

	case NestedNameSpecifier::Super:
	AddDeclRef(NNS->getAsRecordDecl());
	break;
	}
	}
	}

	void ASTRecordWriter::AddNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) {
	// Nested name specifiers usually aren't too long. I think that 8 would
	// typically accommodate the vast majority.
	SmallVector<NestedNameSpecifierLoc , 8> NestedNames;

	// Push each of the nested-name-specifiers's onto a stack for
	// serialization in reverse order.
	while (NNS) {
	NestedNames.push_back(NNS);
	NNS = NNS.getPrefix();
	}

	Record->push_back(NestedNames.size());
	while(!NestedNames.empty()) {
	NNS = NestedNames.pop_back_val();
	NestedNameSpecifier::SpecifierKind Kind
	= NNS.getNestedNameSpecifier()->getKind();
	Record->push_back(Kind);
	switch (Kind) {
	case NestedNameSpecifier::Identifier:
	AddIdentifierRef(NNS.getNestedNameSpecifier()->getAsIdentifier());
	AddSourceRange(NNS.getLocalSourceRange());
	break;

	case NestedNameSpecifier::Namespace:
	AddDeclRef(NNS.getNestedNameSpecifier()->getAsNamespace());
	AddSourceRange(NNS.getLocalSourceRange());
	break;

	case NestedNameSpecifier::NamespaceAlias:
	AddDeclRef(NNS.getNestedNameSpecifier()->getAsNamespaceAlias());
	AddSourceRange(NNS.getLocalSourceRange());
	break;

	case NestedNameSpecifier::TypeSpec:
	case NestedNameSpecifier::TypeSpecWithTemplate:
	Record->push_back(Kind == NestedNameSpecifier::TypeSpecWithTemplate);
	AddTypeLoc(NNS.getTypeLoc());
	AddSourceLocation(NNS.getLocalSourceRange().getEnd());
	break;

	case NestedNameSpecifier::Global:
	AddSourceLocation(NNS.getLocalSourceRange().getEnd());
	break;

	case NestedNameSpecifier::Super:
	AddDeclRef(NNS.getNestedNameSpecifier()->getAsRecordDecl());
	AddSourceRange(NNS.getLocalSourceRange());
	break;
	}
	}
	}

	void ASTRecordWriter::AddTemplateName(TemplateName Name) {
	TemplateName::NameKind Kind = Name.getKind();
	Record->push_back(Kind);
	switch (Kind) {
	case TemplateName::Template:
	AddDeclRef(Name.getAsTemplateDecl());
	break;

	case TemplateName::OverloadedTemplate: {
	OverloadedTemplateStorage *OvT = Name.getAsOverloadedTemplate();
	Record->push_back(OvT->size());
	for (const auto &I : *OvT)
	AddDeclRef(I);
	break;
	}

	case TemplateName::QualifiedTemplate: {
	QualifiedTemplateName *QualT = Name.getAsQualifiedTemplateName();
	AddNestedNameSpecifier(QualT->getQualifier());
	Record->push_back(QualT->hasTemplateKeyword());
	AddDeclRef(QualT->getTemplateDecl());
	break;
	}

	case TemplateName::DependentTemplate: {
	DependentTemplateName *DepT = Name.getAsDependentTemplateName();
	AddNestedNameSpecifier(DepT->getQualifier());
	Record->push_back(DepT->isIdentifier());
	if (DepT->isIdentifier())
	AddIdentifierRef(DepT->getIdentifier());
	else
	Record->push_back(DepT->getOperator());
	break;
	}

	case TemplateName::SubstTemplateTemplateParm: {
	SubstTemplateTemplateParmStorage *subst
	= Name.getAsSubstTemplateTemplateParm();
	AddDeclRef(subst->getParameter());
	AddTemplateName(subst->getReplacement());
	break;
	}

	case TemplateName::SubstTemplateTemplateParmPack: {
	SubstTemplateTemplateParmPackStorage *SubstPack
	= Name.getAsSubstTemplateTemplateParmPack();
	AddDeclRef(SubstPack->getParameterPack());
	AddTemplateArgument(SubstPack->getArgumentPack());
	break;
	}
	}
	}

	void ASTRecordWriter::AddTemplateArgument(const TemplateArgument &Arg) {
	Record->push_back(Arg.getKind());
	switch (Arg.getKind()) {
	case TemplateArgument::Null:
	break;
	case TemplateArgument::Type:
	AddTypeRef(Arg.getAsType());
	break;
	case TemplateArgument::Declaration:
	AddDeclRef(Arg.getAsDecl());
	AddTypeRef(Arg.getParamTypeForDecl());
	break;
	case TemplateArgument::NullPtr:
	AddTypeRef(Arg.getNullPtrType());
	break;
	case TemplateArgument::Integral:
	AddAPSInt(Arg.getAsIntegral());
	AddTypeRef(Arg.getIntegralType());
	break;
	case TemplateArgument::Template:
	AddTemplateName(Arg.getAsTemplateOrTemplatePattern());
	break;
	case TemplateArgument::TemplateExpansion:
	AddTemplateName(Arg.getAsTemplateOrTemplatePattern());
	if (Optional<unsigned> NumExpansions = Arg.getNumTemplateExpansions())
	Record->push_back(*NumExpansions + 1);
	else
	Record->push_back(0);
	break;
	case TemplateArgument::Expression:
	AddStmt(Arg.getAsExpr());
	break;
	case TemplateArgument::Pack:
	Record->push_back(Arg.pack_size());
	for (const auto &P : Arg.pack_elements())
	AddTemplateArgument(P);
	break;
	}
	}

	void ASTRecordWriter::AddTemplateParameterList(
	const TemplateParameterList *TemplateParams) {
	assert(TemplateParams && "No TemplateParams!");
	AddSourceLocation(TemplateParams->getTemplateLoc());
	AddSourceLocation(TemplateParams->getLAngleLoc());
	AddSourceLocation(TemplateParams->getRAngleLoc());
	// TODO: Concepts
	Record->push_back(TemplateParams->size());
	for (const auto &P : *TemplateParams)
	AddDeclRef(P);
	}

	/// \brief Emit a template argument list.
	void ASTRecordWriter::AddTemplateArgumentList(
	const TemplateArgumentList *TemplateArgs) {
	assert(TemplateArgs && "No TemplateArgs!");
	Record->push_back(TemplateArgs->size());
	for (int i = 0, e = TemplateArgs->size(); i != e; ++i)
	AddTemplateArgument(TemplateArgs->get(i));
	}

	void ASTRecordWriter::AddASTTemplateArgumentListInfo(
	const ASTTemplateArgumentListInfo *ASTTemplArgList) {
	assert(ASTTemplArgList && "No ASTTemplArgList!");
	AddSourceLocation(ASTTemplArgList->LAngleLoc);
	AddSourceLocation(ASTTemplArgList->RAngleLoc);
	Record->push_back(ASTTemplArgList->NumTemplateArgs);
	const TemplateArgumentLoc *TemplArgs = ASTTemplArgList->getTemplateArgs();
	for (int i = 0, e = ASTTemplArgList->NumTemplateArgs; i != e; ++i)
	AddTemplateArgumentLoc(TemplArgs[i]);
	}

	void ASTRecordWriter::AddUnresolvedSet(const ASTUnresolvedSet &Set) {
	Record->push_back(Set.size());
	for (ASTUnresolvedSet::const_iterator
	I = Set.begin(), E = Set.end(); I != E; ++I) {
	AddDeclRef(I.getDecl());
	Record->push_back(I.getAccess());
	}
	}

	// FIXME: Move this out of the main ASTRecordWriter interface.
	void ASTRecordWriter::AddCXXBaseSpecifier(const CXXBaseSpecifier &Base) {
	Record->push_back(Base.isVirtual());
	Record->push_back(Base.isBaseOfClass());
	Record->push_back(Base.getAccessSpecifierAsWritten());
	Record->push_back(Base.getInheritConstructors());
	AddTypeSourceInfo(Base.getTypeSourceInfo());
	AddSourceRange(Base.getSourceRange());
	AddSourceLocation(Base.isPackExpansion()? Base.getEllipsisLoc()
	: SourceLocation());
	}

	static uint64_t EmitCXXBaseSpecifiers(ASTWriter &W,
	ArrayRef<CXXBaseSpecifier> Bases) {
	ASTWriter::RecordData Record;
	ASTRecordWriter Writer(W, Record);
	Writer.push_back(Bases.size());

	for (auto &Base : Bases)
	Writer.AddCXXBaseSpecifier(Base);

	return Writer.Emit(serialization::DECL_CXX_BASE_SPECIFIERS);
	}

	// FIXME: Move this out of the main ASTRecordWriter interface.
	void ASTRecordWriter::AddCXXBaseSpecifiers(ArrayRef<CXXBaseSpecifier> Bases) {
	AddOffset(EmitCXXBaseSpecifiers(*Writer, Bases));
	}

	static uint64_t
	EmitCXXCtorInitializers(ASTWriter &W,
	ArrayRef<CXXCtorInitializer *> CtorInits) {
	ASTWriter::RecordData Record;
	ASTRecordWriter Writer(W, Record);
	Writer.push_back(CtorInits.size());

	for (auto *Init : CtorInits) {
	if (Init->isBaseInitializer()) {
	Writer.push_back(CTOR_INITIALIZER_BASE);
	Writer.AddTypeSourceInfo(Init->getTypeSourceInfo());
	Writer.push_back(Init->isBaseVirtual());
	} else if (Init->isDelegatingInitializer()) {
	Writer.push_back(CTOR_INITIALIZER_DELEGATING);
	Writer.AddTypeSourceInfo(Init->getTypeSourceInfo());
	} else if (Init->isMemberInitializer()){
	Writer.push_back(CTOR_INITIALIZER_MEMBER);
	Writer.AddDeclRef(Init->getMember());
	} else {
	Writer.push_back(CTOR_INITIALIZER_INDIRECT_MEMBER);
	Writer.AddDeclRef(Init->getIndirectMember());
	}

	Writer.AddSourceLocation(Init->getMemberLocation());
	Writer.AddStmt(Init->getInit());
	Writer.AddSourceLocation(Init->getLParenLoc());
	Writer.AddSourceLocation(Init->getRParenLoc());
	Writer.push_back(Init->isWritten());
	if (Init->isWritten())
	Writer.push_back(Init->getSourceOrder());
	}

	return Writer.Emit(serialization::DECL_CXX_CTOR_INITIALIZERS);
	}

	// FIXME: Move this out of the main ASTRecordWriter interface.
	void ASTRecordWriter::AddCXXCtorInitializers(
	ArrayRef<CXXCtorInitializer *> CtorInits) {
	AddOffset(EmitCXXCtorInitializers(*Writer, CtorInits));
	}

	void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
	auto &Data = D->data();
	Record->push_back(Data.IsLambda);
	Record->push_back(Data.UserDeclaredConstructor);
	Record->push_back(Data.UserDeclaredSpecialMembers);
	Record->push_back(Data.Aggregate);
	Record->push_back(Data.PlainOldData);
	Record->push_back(Data.Empty);
	Record->push_back(Data.Polymorphic);
	Record->push_back(Data.Abstract);
	Record->push_back(Data.IsStandardLayout);
	Record->push_back(Data.HasNoNonEmptyBases);
	Record->push_back(Data.HasPrivateFields);
	Record->push_back(Data.HasProtectedFields);
	Record->push_back(Data.HasPublicFields);
	Record->push_back(Data.HasMutableFields);
	Record->push_back(Data.HasVariantMembers);
	Record->push_back(Data.HasOnlyCMembers);
	Record->push_back(Data.HasInClassInitializer);
	Record->push_back(Data.HasUninitializedReferenceMember);
	Record->push_back(Data.HasUninitializedFields);
	Record->push_back(Data.HasInheritedConstructor);
	Record->push_back(Data.HasInheritedAssignment);
	+ Record->push_back(Data.NeedOverloadResolutionForCopyConstructor);
	Record->push_back(Data.NeedOverloadResolutionForMoveConstructor);
	Record->push_back(Data.NeedOverloadResolutionForMoveAssignment);
	Record->push_back(Data.NeedOverloadResolutionForDestructor);
	+ Record->push_back(Data.DefaultedCopyConstructorIsDeleted);
	Record->push_back(Data.DefaultedMoveConstructorIsDeleted);
	Record->push_back(Data.DefaultedMoveAssignmentIsDeleted);
	Record->push_back(Data.DefaultedDestructorIsDeleted);
	Record->push_back(Data.HasTrivialSpecialMembers);
	Record->push_back(Data.DeclaredNonTrivialSpecialMembers);
	Record->push_back(Data.HasIrrelevantDestructor);
	Record->push_back(Data.HasConstexprNonCopyMoveConstructor);
	Record->push_back(Data.HasDefaultedDefaultConstructor);
	+ Record->push_back(Data.CanPassInRegisters);
	Record->push_back(Data.DefaultedDefaultConstructorIsConstexpr);
	Record->push_back(Data.HasConstexprDefaultConstructor);
	Record->push_back(Data.HasNonLiteralTypeFieldsOrBases);
	Record->push_back(Data.ComputedVisibleConversions);
	Record->push_back(Data.UserProvidedDefaultConstructor);
	Record->push_back(Data.DeclaredSpecialMembers);
	Record->push_back(Data.ImplicitCopyConstructorCanHaveConstParamForVBase);
	Record->push_back(Data.ImplicitCopyConstructorCanHaveConstParamForNonVBase);
	Record->push_back(Data.ImplicitCopyAssignmentHasConstParam);
	Record->push_back(Data.HasDeclaredCopyConstructorWithConstParam);
	Record->push_back(Data.HasDeclaredCopyAssignmentWithConstParam);

	// getODRHash will compute the ODRHash if it has not been previously computed.
	Record->push_back(D->getODRHash());
	bool ModulesDebugInfo = Writer->Context->getLangOpts().ModulesDebugInfo &&
	Writer->WritingModule && !D->isDependentType();
	Record->push_back(ModulesDebugInfo);
	if (ModulesDebugInfo)
	Writer->ModularCodegenDecls.push_back(Writer->GetDeclRef(D));

	// IsLambda bit is already saved.

	Record->push_back(Data.NumBases);
	if (Data.NumBases > 0)
	AddCXXBaseSpecifiers(Data.bases());

	// FIXME: Make VBases lazily computed when needed to avoid storing them.
	Record->push_back(Data.NumVBases);
	if (Data.NumVBases > 0)
	AddCXXBaseSpecifiers(Data.vbases());

	AddUnresolvedSet(Data.Conversions.get(*Writer->Context));
	AddUnresolvedSet(Data.VisibleConversions.get(*Writer->Context));
	// Data.Definition is the owning decl, no need to write it.
	AddDeclRef(D->getFirstFriend());

	// Add lambda-specific data.
	if (Data.IsLambda) {
	auto &Lambda = D->getLambdaData();
	Record->push_back(Lambda.Dependent);
	Record->push_back(Lambda.IsGenericLambda);
	Record->push_back(Lambda.CaptureDefault);
	Record->push_back(Lambda.NumCaptures);
	Record->push_back(Lambda.NumExplicitCaptures);
	Record->push_back(Lambda.ManglingNumber);
	AddDeclRef(D->getLambdaContextDecl());
	AddTypeSourceInfo(Lambda.MethodTyInfo);
	for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) {
	const LambdaCapture &Capture = Lambda.Captures[I];
	AddSourceLocation(Capture.getLocation());
	Record->push_back(Capture.isImplicit());
	Record->push_back(Capture.getCaptureKind());
	switch (Capture.getCaptureKind()) {
	case LCK_StarThis:
	case LCK_This:
	case LCK_VLAType:
	break;
	case LCK_ByCopy:
	case LCK_ByRef:
	VarDecl *Var =
	Capture.capturesVariable() ? Capture.getCapturedVar() : nullptr;
	AddDeclRef(Var);
	AddSourceLocation(Capture.isPackExpansion() ? Capture.getEllipsisLoc()
	: SourceLocation());
	break;
	}
	}
	}
	}

	void ASTWriter::ReaderInitialized(ASTReader *Reader) {
	assert(Reader && "Cannot remove chain");
	assert((!Chain \|\| Chain == Reader) && "Cannot replace chain");
	assert(FirstDeclID == NextDeclID &&
	FirstTypeID == NextTypeID &&
	FirstIdentID == NextIdentID &&
	FirstMacroID == NextMacroID &&
	FirstSubmoduleID == NextSubmoduleID &&
	FirstSelectorID == NextSelectorID &&
	"Setting chain after writing has started.");

	Chain = Reader;

	// Note, this will get called multiple times, once one the reader starts up
	// and again each time it's done reading a PCH or module.
	FirstDeclID = NUM_PREDEF_DECL_IDS + Chain->getTotalNumDecls();
	FirstTypeID = NUM_PREDEF_TYPE_IDS + Chain->getTotalNumTypes();
	FirstIdentID = NUM_PREDEF_IDENT_IDS + Chain->getTotalNumIdentifiers();
	FirstMacroID = NUM_PREDEF_MACRO_IDS + Chain->getTotalNumMacros();
	FirstSubmoduleID = NUM_PREDEF_SUBMODULE_IDS + Chain->getTotalNumSubmodules();
	FirstSelectorID = NUM_PREDEF_SELECTOR_IDS + Chain->getTotalNumSelectors();
	NextDeclID = FirstDeclID;
	NextTypeID = FirstTypeID;
	NextIdentID = FirstIdentID;
	NextMacroID = FirstMacroID;
	NextSelectorID = FirstSelectorID;
	NextSubmoduleID = FirstSubmoduleID;
	}

	void ASTWriter::IdentifierRead(IdentID ID, IdentifierInfo *II) {
	// Always keep the highest ID. See \p TypeRead() for more information.
	IdentID &StoredID = IdentifierIDs[II];
	if (ID > StoredID)
	StoredID = ID;
	}

	void ASTWriter::MacroRead(serialization::MacroID ID, MacroInfo *MI) {
	// Always keep the highest ID. See \p TypeRead() for more information.
	MacroID &StoredID = MacroIDs[MI];
	if (ID > StoredID)
	StoredID = ID;
	}

	void ASTWriter::TypeRead(TypeIdx Idx, QualType T) {
	// Always take the highest-numbered type index. This copes with an interesting
	// case for chained AST writing where we schedule writing the type and then,
	// later, deserialize the type from another AST. In this case, we want to
	// keep the higher-numbered entry so that we can properly write it out to
	// the AST file.
	TypeIdx &StoredIdx = TypeIdxs[T];
	if (Idx.getIndex() >= StoredIdx.getIndex())
	StoredIdx = Idx;
	}

	void ASTWriter::SelectorRead(SelectorID ID, Selector S) {
	// Always keep the highest ID. See \p TypeRead() for more information.
	SelectorID &StoredID = SelectorIDs[S];
	if (ID > StoredID)
	StoredID = ID;
	}

	void ASTWriter::MacroDefinitionRead(serialization::PreprocessedEntityID ID,
	MacroDefinitionRecord *MD) {
	assert(MacroDefinitions.find(MD) == MacroDefinitions.end());
	MacroDefinitions[MD] = ID;
	}

	void ASTWriter::ModuleRead(serialization::SubmoduleID ID, Module *Mod) {
	assert(SubmoduleIDs.find(Mod) == SubmoduleIDs.end());
	SubmoduleIDs[Mod] = ID;
	}

	void ASTWriter::CompletedTagDefinition(const TagDecl *D) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(D->isCompleteDefinition());
	assert(!WritingAST && "Already writing the AST!");
	if (auto *RD = dyn_cast<CXXRecordDecl>(D)) {
	// We are interested when a PCH decl is modified.
	if (RD->isFromASTFile()) {
	// A forward reference was mutated into a definition. Rewrite it.
	// FIXME: This happens during template instantiation, should we
	// have created a new definition decl instead ?
	assert(isTemplateInstantiation(RD->getTemplateSpecializationKind()) &&
	"completed a tag from another module but not by instantiation?");
	DeclUpdates[RD].push_back(
	DeclUpdate(UPD_CXX_INSTANTIATED_CLASS_DEFINITION));
	}
	}
	}

	static bool isImportedDeclContext(ASTReader Chain, const Decl D) {
	if (D->isFromASTFile())
	return true;

	// The predefined __va_list_tag struct is imported if we imported any decls.
	// FIXME: This is a gross hack.
	return D == D->getASTContext().getVaListTagDecl();
	}

	void ASTWriter::AddedVisibleDecl(const DeclContext DC, const Decl D) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(DC->isLookupContext() &&
	"Should not add lookup results to non-lookup contexts!");

	// TU is handled elsewhere.
	if (isa<TranslationUnitDecl>(DC))
	return;

	// Namespaces are handled elsewhere, except for template instantiations of
	// FunctionTemplateDecls in namespaces. We are interested in cases where the
	// local instantiations are added to an imported context. Only happens when
	// adding ADL lookup candidates, for example templated friends.
	if (isa<NamespaceDecl>(DC) && D->getFriendObjectKind() == Decl::FOK_None &&
	!isa<FunctionTemplateDecl>(D))
	return;

	// We're only interested in cases where a local declaration is added to an
	// imported context.
	if (D->isFromASTFile() \|\| !isImportedDeclContext(Chain, cast<Decl>(DC)))
	return;

	assert(DC == DC->getPrimaryContext() && "added to non-primary context");
	assert(!getDefinitiveDeclContext(DC) && "DeclContext not definitive!");
	assert(!WritingAST && "Already writing the AST!");
	if (UpdatedDeclContexts.insert(DC) && !cast<Decl>(DC)->isFromASTFile()) {
	// We're adding a visible declaration to a predefined decl context. Ensure
	// that we write out all of its lookup results so we don't get a nasty
	// surprise when we try to emit its lookup table.
	for (auto *Child : DC->decls())
	DeclsToEmitEvenIfUnreferenced.push_back(Child);
	}
	DeclsToEmitEvenIfUnreferenced.push_back(D);
	}

	void ASTWriter::AddedCXXImplicitMember(const CXXRecordDecl RD, const Decl D) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(D->isImplicit());

	// We're only interested in cases where a local declaration is added to an
	// imported context.
	if (D->isFromASTFile() \|\| !isImportedDeclContext(Chain, RD))
	return;

	if (!isa<CXXMethodDecl>(D))
	return;

	// A decl coming from PCH was modified.
	assert(RD->isCompleteDefinition());
	assert(!WritingAST && "Already writing the AST!");
	DeclUpdates[RD].push_back(DeclUpdate(UPD_CXX_ADDED_IMPLICIT_MEMBER, D));
	}

	void ASTWriter::ResolvedExceptionSpec(const FunctionDecl *FD) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!DoneWritingDeclsAndTypes && "Already done writing updates!");
	if (!Chain) return;
	Chain->forEachImportedKeyDecl(FD, [&](const Decl *D) {
	// If we don't already know the exception specification for this redecl
	// chain, add an update record for it.
	if (isUnresolvedExceptionSpec(cast<FunctionDecl>(D)
	->getType()
	->castAs<FunctionProtoType>()
	->getExceptionSpecType()))
	DeclUpdates[D].push_back(UPD_CXX_RESOLVED_EXCEPTION_SPEC);
	});
	}

	void ASTWriter::DeducedReturnType(const FunctionDecl *FD, QualType ReturnType) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!WritingAST && "Already writing the AST!");
	if (!Chain) return;
	Chain->forEachImportedKeyDecl(FD, [&](const Decl *D) {
	DeclUpdates[D].push_back(
	DeclUpdate(UPD_CXX_DEDUCED_RETURN_TYPE, ReturnType));
	});
	}

	void ASTWriter::ResolvedOperatorDelete(const CXXDestructorDecl *DD,
	const FunctionDecl *Delete) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!WritingAST && "Already writing the AST!");
	assert(Delete && "Not given an operator delete");
	if (!Chain) return;
	Chain->forEachImportedKeyDecl(DD, [&](const Decl *D) {
	DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_RESOLVED_DTOR_DELETE, Delete));
	});
	}

	void ASTWriter::CompletedImplicitDefinition(const FunctionDecl *D) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!WritingAST && "Already writing the AST!");
	if (!D->isFromASTFile())
	return; // Declaration not imported from PCH.

	// Implicit function decl from a PCH was defined.
	DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_ADDED_FUNCTION_DEFINITION));
	}

	void ASTWriter::FunctionDefinitionInstantiated(const FunctionDecl *D) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!WritingAST && "Already writing the AST!");
	if (!D->isFromASTFile())
	return;

	DeclUpdates[D].push_back(DeclUpdate(UPD_CXX_ADDED_FUNCTION_DEFINITION));
	}

	void ASTWriter::StaticDataMemberInstantiated(const VarDecl *D) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!WritingAST && "Already writing the AST!");
	if (!D->isFromASTFile())
	return;

	// Since the actual instantiation is delayed, this really means that we need
	// to update the instantiation location.
	DeclUpdates[D].push_back(
	DeclUpdate(UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER,
	D->getMemberSpecializationInfo()->getPointOfInstantiation()));
	}

	void ASTWriter::DefaultArgumentInstantiated(const ParmVarDecl *D) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!WritingAST && "Already writing the AST!");
	if (!D->isFromASTFile())
	return;

	DeclUpdates[D].push_back(
	DeclUpdate(UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT, D));
	}

	void ASTWriter::DefaultMemberInitializerInstantiated(const FieldDecl *D) {
	assert(!WritingAST && "Already writing the AST!");
	if (!D->isFromASTFile())
	return;

	DeclUpdates[D].push_back(
	DeclUpdate(UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER, D));
	}

	void ASTWriter::AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
	const ObjCInterfaceDecl *IFD) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!WritingAST && "Already writing the AST!");
	if (!IFD->isFromASTFile())
	return; // Declaration not imported from PCH.

	assert(IFD->getDefinition() && "Category on a class without a definition?");
	ObjCClassesWithCategories.insert(
	const_cast<ObjCInterfaceDecl *>(IFD->getDefinition()));
	}

	void ASTWriter::DeclarationMarkedUsed(const Decl *D) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!WritingAST && "Already writing the AST!");

	// If there is any declaration of the entity that's not from an AST file,
	// we can skip writing the update record. We make sure that isUsed() triggers
	// completion of the redeclaration chain of the entity.
	for (auto Prev = D->getMostRecentDecl(); Prev; Prev = Prev->getPreviousDecl())
	if (IsLocalDecl(Prev))
	return;

	DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_MARKED_USED));
	}

	void ASTWriter::DeclarationMarkedOpenMPThreadPrivate(const Decl *D) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!WritingAST && "Already writing the AST!");
	if (!D->isFromASTFile())
	return;

	DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_MARKED_OPENMP_THREADPRIVATE));
	}

	void ASTWriter::DeclarationMarkedOpenMPDeclareTarget(const Decl *D,
	const Attr *Attr) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!WritingAST && "Already writing the AST!");
	if (!D->isFromASTFile())
	return;

	DeclUpdates[D].push_back(
	DeclUpdate(UPD_DECL_MARKED_OPENMP_DECLARETARGET, Attr));
	}

	void ASTWriter::RedefinedHiddenDefinition(const NamedDecl D, Module M) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!WritingAST && "Already writing the AST!");
	assert(D->isHidden() && "expected a hidden declaration");
	DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_EXPORTED, M));
	}

	void ASTWriter::AddedAttributeToRecord(const Attr *Attr,
	const RecordDecl *Record) {
	if (Chain && Chain->isProcessingUpdateRecords()) return;
	assert(!WritingAST && "Already writing the AST!");
	if (!Record->isFromASTFile())
	return;
	DeclUpdates[Record].push_back(DeclUpdate(UPD_ADDED_ATTR_TO_RECORD, Attr));
	}

	void ASTWriter::AddedCXXTemplateSpecialization(
	const ClassTemplateDecl TD, const ClassTemplateSpecializationDecl D) {
	assert(!WritingAST && "Already writing the AST!");

	if (!TD->getFirstDecl()->isFromASTFile())
	return;
	if (Chain && Chain->isProcessingUpdateRecords())
	return;

	DeclsToEmitEvenIfUnreferenced.push_back(D);
	}

	void ASTWriter::AddedCXXTemplateSpecialization(
	const VarTemplateDecl TD, const VarTemplateSpecializationDecl D) {
	assert(!WritingAST && "Already writing the AST!");

	if (!TD->getFirstDecl()->isFromASTFile())
	return;
	if (Chain && Chain->isProcessingUpdateRecords())
	return;

	DeclsToEmitEvenIfUnreferenced.push_back(D);
	}

	void ASTWriter::AddedCXXTemplateSpecialization(const FunctionTemplateDecl *TD,
	const FunctionDecl *D) {
	assert(!WritingAST && "Already writing the AST!");

	if (!TD->getFirstDecl()->isFromASTFile())
	return;
	if (Chain && Chain->isProcessingUpdateRecords())
	return;

	DeclsToEmitEvenIfUnreferenced.push_back(D);
	}
	Index: head/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
	===================================================================
	--- head/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp (revision 322854)
	+++ head/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp (revision 322855)
	@@ -1,2482 +1,2495 @@
	//== RegionStore.cpp - Field-sensitive store model --------------- C++ ---==//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines a basic region store model. In this model, we do have field
	// sensitivity. But we assume nothing about the heap shape. So recursive data
	// structures are largely ignored. Basically we do 1-limiting analysis.
	// Parameter pointers are assumed with no aliasing. Pointee objects of
	// parameters are created lazily.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/AST/Attr.h"
	#include "clang/AST/CharUnits.h"
	#include "clang/Analysis/Analyses/LiveVariables.h"
	#include "clang/Analysis/AnalysisContext.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h"
	#include "llvm/ADT/ImmutableMap.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/Support/raw_ostream.h"
	#include <utility>

	using namespace clang;
	using namespace ento;

	//===----------------------------------------------------------------------===//
	// Representation of binding keys.
	//===----------------------------------------------------------------------===//

	namespace {
	class BindingKey {
	public:
	enum Kind { Default = 0x0, Direct = 0x1 };
	private:
	enum { Symbolic = 0x2 };

	llvm::PointerIntPair<const MemRegion *, 2> P;
	uint64_t Data;

	/// Create a key for a binding to region \p r, which has a symbolic offset
	/// from region \p Base.
	explicit BindingKey(const SubRegion r, const SubRegion Base, Kind k)
	: P(r, k \| Symbolic), Data(reinterpret_cast<uintptr_t>(Base)) {
	assert(r && Base && "Must have known regions.");
	assert(getConcreteOffsetRegion() == Base && "Failed to store base region");
	}

	/// Create a key for a binding at \p offset from base region \p r.
	explicit BindingKey(const MemRegion *r, uint64_t offset, Kind k)
	: P(r, k), Data(offset) {
	assert(r && "Must have known regions.");
	assert(getOffset() == offset && "Failed to store offset");
	assert((r == r->getBaseRegion() \|\| isa<ObjCIvarRegion>(r)) && "Not a base");
	}
	public:

	bool isDirect() const { return P.getInt() & Direct; }
	bool hasSymbolicOffset() const { return P.getInt() & Symbolic; }

	const MemRegion *getRegion() const { return P.getPointer(); }
	uint64_t getOffset() const {
	assert(!hasSymbolicOffset());
	return Data;
	}

	const SubRegion *getConcreteOffsetRegion() const {
	assert(hasSymbolicOffset());
	return reinterpret_cast<const SubRegion *>(static_cast<uintptr_t>(Data));
	}

	const MemRegion *getBaseRegion() const {
	if (hasSymbolicOffset())
	return getConcreteOffsetRegion()->getBaseRegion();
	return getRegion()->getBaseRegion();
	}

	void Profile(llvm::FoldingSetNodeID& ID) const {
	ID.AddPointer(P.getOpaqueValue());
	ID.AddInteger(Data);
	}

	static BindingKey Make(const MemRegion *R, Kind k);

	bool operator<(const BindingKey &X) const {
	if (P.getOpaqueValue() < X.P.getOpaqueValue())
	return true;
	if (P.getOpaqueValue() > X.P.getOpaqueValue())
	return false;
	return Data < X.Data;
	}

	bool operator==(const BindingKey &X) const {
	return P.getOpaqueValue() == X.P.getOpaqueValue() &&
	Data == X.Data;
	}

	void dump() const;
	};
	} // end anonymous namespace

	BindingKey BindingKey::Make(const MemRegion *R, Kind k) {
	const RegionOffset &RO = R->getAsOffset();
	if (RO.hasSymbolicOffset())
	return BindingKey(cast<SubRegion>(R), cast<SubRegion>(RO.getRegion()), k);

	return BindingKey(RO.getRegion(), RO.getOffset(), k);
	}

	namespace llvm {
	static inline
	raw_ostream &operator<<(raw_ostream &os, BindingKey K) {
	os << '(' << K.getRegion();
	if (!K.hasSymbolicOffset())
	os << ',' << K.getOffset();
	os << ',' << (K.isDirect() ? "direct" : "default")
	<< ')';
	return os;
	}

	template <typename T> struct isPodLike;
	template <> struct isPodLike<BindingKey> {
	static const bool value = true;
	};
	} // end llvm namespace

	LLVM_DUMP_METHOD void BindingKey::dump() const { llvm::errs() << *this; }

	//===----------------------------------------------------------------------===//
	// Actual Store type.
	//===----------------------------------------------------------------------===//

	typedef llvm::ImmutableMap<BindingKey, SVal> ClusterBindings;
	typedef llvm::ImmutableMapRef<BindingKey, SVal> ClusterBindingsRef;
	typedef std::pair<BindingKey, SVal> BindingPair;

	typedef llvm::ImmutableMap<const MemRegion *, ClusterBindings>
	RegionBindings;

	namespace {
	class RegionBindingsRef : public llvm::ImmutableMapRef<const MemRegion *,
	ClusterBindings> {
	ClusterBindings::Factory *CBFactory;

	public:
	typedef llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>
	ParentTy;

	RegionBindingsRef(ClusterBindings::Factory &CBFactory,
	const RegionBindings::TreeTy *T,
	RegionBindings::TreeTy::Factory *F)
	: llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(T, F),
	CBFactory(&CBFactory) {}

	RegionBindingsRef(const ParentTy &P, ClusterBindings::Factory &CBFactory)
	: llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(P),
	CBFactory(&CBFactory) {}

	RegionBindingsRef add(key_type_ref K, data_type_ref D) const {
	return RegionBindingsRef(static_cast<const ParentTy *>(this)->add(K, D),
	*CBFactory);
	}

	RegionBindingsRef remove(key_type_ref K) const {
	return RegionBindingsRef(static_cast<const ParentTy *>(this)->remove(K),
	*CBFactory);
	}

	RegionBindingsRef addBinding(BindingKey K, SVal V) const;

	RegionBindingsRef addBinding(const MemRegion *R,
	BindingKey::Kind k, SVal V) const;

	const SVal *lookup(BindingKey K) const;
	const SVal lookup(const MemRegion R, BindingKey::Kind k) const;
	using llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>::lookup;

	RegionBindingsRef removeBinding(BindingKey K);

	RegionBindingsRef removeBinding(const MemRegion *R,
	BindingKey::Kind k);

	RegionBindingsRef removeBinding(const MemRegion *R) {
	return removeBinding(R, BindingKey::Direct).
	removeBinding(R, BindingKey::Default);
	}

	Optional<SVal> getDirectBinding(const MemRegion *R) const;

	/// getDefaultBinding - Returns an SVal* representing an optional default
	/// binding associated with a region and its subregions.
	Optional<SVal> getDefaultBinding(const MemRegion *R) const;

	/// Return the internal tree as a Store.
	Store asStore() const {
	return asImmutableMap().getRootWithoutRetain();
	}

	void dump(raw_ostream &OS, const char *nl) const {
	for (iterator I = begin(), E = end(); I != E; ++I) {
	const ClusterBindings &Cluster = I.getData();
	for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end();
	CI != CE; ++CI) {
	OS << ' ' << CI.getKey() << " : " << CI.getData() << nl;
	}
	OS << nl;
	}
	}

	LLVM_DUMP_METHOD void dump() const { dump(llvm::errs(), "\n"); }
	};
	} // end anonymous namespace

	typedef const RegionBindingsRef& RegionBindingsConstRef;

	Optional<SVal> RegionBindingsRef::getDirectBinding(const MemRegion *R) const {
	return Optional<SVal>::create(lookup(R, BindingKey::Direct));
	}

	Optional<SVal> RegionBindingsRef::getDefaultBinding(const MemRegion *R) const {
	if (R->isBoundable())
	if (const TypedValueRegion *TR = dyn_cast<TypedValueRegion>(R))
	if (TR->getValueType()->isUnionType())
	return UnknownVal();

	return Optional<SVal>::create(lookup(R, BindingKey::Default));
	}

	RegionBindingsRef RegionBindingsRef::addBinding(BindingKey K, SVal V) const {
	const MemRegion *Base = K.getBaseRegion();

	const ClusterBindings *ExistingCluster = lookup(Base);
	ClusterBindings Cluster =
	(ExistingCluster ? *ExistingCluster : CBFactory->getEmptyMap());

	ClusterBindings NewCluster = CBFactory->add(Cluster, K, V);
	return add(Base, NewCluster);
	}


	RegionBindingsRef RegionBindingsRef::addBinding(const MemRegion *R,
	BindingKey::Kind k,
	SVal V) const {
	return addBinding(BindingKey::Make(R, k), V);
	}

	const SVal *RegionBindingsRef::lookup(BindingKey K) const {
	const ClusterBindings *Cluster = lookup(K.getBaseRegion());
	if (!Cluster)
	return nullptr;
	return Cluster->lookup(K);
	}

	const SVal RegionBindingsRef::lookup(const MemRegion R,
	BindingKey::Kind k) const {
	return lookup(BindingKey::Make(R, k));
	}

	RegionBindingsRef RegionBindingsRef::removeBinding(BindingKey K) {
	const MemRegion *Base = K.getBaseRegion();
	const ClusterBindings *Cluster = lookup(Base);
	if (!Cluster)
	return *this;

	ClusterBindings NewCluster = CBFactory->remove(*Cluster, K);
	if (NewCluster.isEmpty())
	return remove(Base);
	return add(Base, NewCluster);
	}

	RegionBindingsRef RegionBindingsRef::removeBinding(const MemRegion *R,
	BindingKey::Kind k){
	return removeBinding(BindingKey::Make(R, k));
	}

	//===----------------------------------------------------------------------===//
	// Fine-grained control of RegionStoreManager.
	//===----------------------------------------------------------------------===//

	namespace {
	struct minimal_features_tag {};
	struct maximal_features_tag {};

	class RegionStoreFeatures {
	bool SupportsFields;
	public:
	RegionStoreFeatures(minimal_features_tag) :
	SupportsFields(false) {}

	RegionStoreFeatures(maximal_features_tag) :
	SupportsFields(true) {}

	void enableFields(bool t) { SupportsFields = t; }

	bool supportsFields() const { return SupportsFields; }
	};
	}

	//===----------------------------------------------------------------------===//
	// Main RegionStore logic.
	//===----------------------------------------------------------------------===//

	namespace {
	class invalidateRegionsWorker;

	class RegionStoreManager : public StoreManager {
	public:
	const RegionStoreFeatures Features;

	RegionBindings::Factory RBFactory;
	mutable ClusterBindings::Factory CBFactory;

	typedef std::vector<SVal> SValListTy;
	private:
	typedef llvm::DenseMap<const LazyCompoundValData *,
	SValListTy> LazyBindingsMapTy;
	LazyBindingsMapTy LazyBindingsMap;

	/// The largest number of fields a struct can have and still be
	/// considered "small".
	///
	/// This is currently used to decide whether or not it is worth "forcing" a
	/// LazyCompoundVal on bind.
	///
	/// This is controlled by 'region-store-small-struct-limit' option.
	/// To disable all small-struct-dependent behavior, set the option to "0".
	unsigned SmallStructLimit;

	/// \brief A helper used to populate the work list with the given set of
	/// regions.
	void populateWorkList(invalidateRegionsWorker &W,
	ArrayRef<SVal> Values,
	InvalidatedRegions *TopLevelRegions);

	public:
	RegionStoreManager(ProgramStateManager& mgr, const RegionStoreFeatures &f)
	: StoreManager(mgr), Features(f),
	RBFactory(mgr.getAllocator()), CBFactory(mgr.getAllocator()),
	SmallStructLimit(0) {
	if (SubEngine *Eng = StateMgr.getOwningEngine()) {
	AnalyzerOptions &Options = Eng->getAnalysisManager().options;
	SmallStructLimit =
	Options.getOptionAsInteger("region-store-small-struct-limit", 2);
	}
	}


	/// setImplicitDefaultValue - Set the default binding for the provided
	/// MemRegion to the value implicitly defined for compound literals when
	/// the value is not specified.
	RegionBindingsRef setImplicitDefaultValue(RegionBindingsConstRef B,
	const MemRegion *R, QualType T);

	/// ArrayToPointer - Emulates the "decay" of an array to a pointer
	/// type. 'Array' represents the lvalue of the array being decayed
	/// to a pointer, and the returned SVal represents the decayed
	/// version of that lvalue (i.e., a pointer to the first element of
	/// the array). This is called by ExprEngine when evaluating
	/// casts from arrays to pointers.
	SVal ArrayToPointer(Loc Array, QualType ElementTy) override;

	StoreRef getInitialStore(const LocationContext *InitLoc) override {
	return StoreRef(RBFactory.getEmptyMap().getRootWithoutRetain(), *this);
	}

	//===-------------------------------------------------------------------===//
	// Binding values to regions.
	//===-------------------------------------------------------------------===//
	RegionBindingsRef invalidateGlobalRegion(MemRegion::Kind K,
	const Expr *Ex,
	unsigned Count,
	const LocationContext *LCtx,
	RegionBindingsRef B,
	InvalidatedRegions *Invalidated);

	StoreRef invalidateRegions(Store store,
	ArrayRef<SVal> Values,
	const Expr *E, unsigned Count,
	const LocationContext *LCtx,
	const CallEvent *Call,
	InvalidatedSymbols &IS,
	RegionAndSymbolInvalidationTraits &ITraits,
	InvalidatedRegions *Invalidated,
	InvalidatedRegions *InvalidatedTopLevel) override;

	bool scanReachableSymbols(Store S, const MemRegion *R,
	ScanReachableSymbols &Callbacks) override;

	RegionBindingsRef removeSubRegionBindings(RegionBindingsConstRef B,
	const SubRegion *R);

	public: // Part of public interface to class.

	StoreRef Bind(Store store, Loc LV, SVal V) override {
	return StoreRef(bind(getRegionBindings(store), LV, V).asStore(), *this);
	}

	RegionBindingsRef bind(RegionBindingsConstRef B, Loc LV, SVal V);

	// BindDefault is only used to initialize a region with a default value.
	StoreRef BindDefault(Store store, const MemRegion *R, SVal V) override {
	+ // FIXME: The offsets of empty bases can be tricky because of
	+ // of the so called "empty base class optimization".
	+ // If a base class has been optimized out
	+ // we should not try to create a binding, otherwise we should.
	+ // Unfortunately, at the moment ASTRecordLayout doesn't expose
	+ // the actual sizes of the empty bases
	+ // and trying to infer them from offsets/alignments
	+ // seems to be error-prone and non-trivial because of the trailing padding.
	+ // As a temporary mitigation we don't create bindings for empty bases.
	+ if (R->getKind() == MemRegion::CXXBaseObjectRegionKind &&
	+ cast<CXXBaseObjectRegion>(R)->getDecl()->isEmpty())
	+ return StoreRef(store, *this);
	+
	RegionBindingsRef B = getRegionBindings(store);
	assert(!B.lookup(R, BindingKey::Direct));

	BindingKey Key = BindingKey::Make(R, BindingKey::Default);
	if (B.lookup(Key)) {
	const SubRegion *SR = cast<SubRegion>(R);
	assert(SR->getAsOffset().getOffset() ==
	SR->getSuperRegion()->getAsOffset().getOffset() &&
	"A default value must come from a super-region");
	B = removeSubRegionBindings(B, SR);
	} else {
	B = B.addBinding(Key, V);
	}

	return StoreRef(B.asImmutableMap().getRootWithoutRetain(), *this);
	}

	/// Attempt to extract the fields of \p LCV and bind them to the struct region
	/// \p R.
	///
	/// This path is used when it seems advantageous to "force" loading the values
	/// within a LazyCompoundVal to bind memberwise to the struct region, rather
	/// than using a Default binding at the base of the entire region. This is a
	/// heuristic attempting to avoid building long chains of LazyCompoundVals.
	///
	/// \returns The updated store bindings, or \c None if binding non-lazily
	/// would be too expensive.
	Optional<RegionBindingsRef> tryBindSmallStruct(RegionBindingsConstRef B,
	const TypedValueRegion *R,
	const RecordDecl *RD,
	nonloc::LazyCompoundVal LCV);

	/// BindStruct - Bind a compound value to a structure.
	RegionBindingsRef bindStruct(RegionBindingsConstRef B,
	const TypedValueRegion* R, SVal V);

	/// BindVector - Bind a compound value to a vector.
	RegionBindingsRef bindVector(RegionBindingsConstRef B,
	const TypedValueRegion* R, SVal V);

	RegionBindingsRef bindArray(RegionBindingsConstRef B,
	const TypedValueRegion* R,
	SVal V);

	/// Clears out all bindings in the given region and assigns a new value
	/// as a Default binding.
	RegionBindingsRef bindAggregate(RegionBindingsConstRef B,
	const TypedRegion *R,
	SVal DefaultVal);

	/// \brief Create a new store with the specified binding removed.
	/// \param ST the original store, that is the basis for the new store.
	/// \param L the location whose binding should be removed.
	StoreRef killBinding(Store ST, Loc L) override;

	void incrementReferenceCount(Store store) override {
	getRegionBindings(store).manualRetain();
	}

	/// If the StoreManager supports it, decrement the reference count of
	/// the specified Store object. If the reference count hits 0, the memory
	/// associated with the object is recycled.
	void decrementReferenceCount(Store store) override {
	getRegionBindings(store).manualRelease();
	}

	bool includedInBindings(Store store, const MemRegion *region) const override;

	/// \brief Return the value bound to specified location in a given state.
	///
	/// The high level logic for this method is this:
	/// getBinding (L)
	/// if L has binding
	/// return L's binding
	/// else if L is in killset
	/// return unknown
	/// else
	/// if L is on stack or heap
	/// return undefined
	/// else
	/// return symbolic
	SVal getBinding(Store S, Loc L, QualType T) override {
	return getBinding(getRegionBindings(S), L, T);
	}

	Optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override {
	RegionBindingsRef B = getRegionBindings(S);
	// Default bindings are always applied over a base region so look up the
	// base region's default binding, otherwise the lookup will fail when R
	// is at an offset from R->getBaseRegion().
	return B.getDefaultBinding(R->getBaseRegion());
	}

	SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType());

	SVal getBindingForElement(RegionBindingsConstRef B, const ElementRegion *R);

	SVal getBindingForField(RegionBindingsConstRef B, const FieldRegion *R);

	SVal getBindingForObjCIvar(RegionBindingsConstRef B, const ObjCIvarRegion *R);

	SVal getBindingForVar(RegionBindingsConstRef B, const VarRegion *R);

	SVal getBindingForLazySymbol(const TypedValueRegion *R);

	SVal getBindingForFieldOrElementCommon(RegionBindingsConstRef B,
	const TypedValueRegion *R,
	QualType Ty);

	SVal getLazyBinding(const SubRegion *LazyBindingRegion,
	RegionBindingsRef LazyBinding);

	/// Get bindings for the values in a struct and return a CompoundVal, used
	/// when doing struct copy:
	/// struct s x, y;
	/// x = y;
	/// y's value is retrieved by this method.
	SVal getBindingForStruct(RegionBindingsConstRef B, const TypedValueRegion *R);
	SVal getBindingForArray(RegionBindingsConstRef B, const TypedValueRegion *R);
	NonLoc createLazyBinding(RegionBindingsConstRef B, const TypedValueRegion *R);

	/// Used to lazily generate derived symbols for bindings that are defined
	/// implicitly by default bindings in a super region.
	///
	/// Note that callers may need to specially handle LazyCompoundVals, which
	/// are returned as is in case the caller needs to treat them differently.
	Optional<SVal> getBindingForDerivedDefaultValue(RegionBindingsConstRef B,
	const MemRegion *superR,
	const TypedValueRegion *R,
	QualType Ty);

	/// Get the state and region whose binding this region \p R corresponds to.
	///
	/// If there is no lazy binding for \p R, the returned value will have a null
	/// \c second. Note that a null pointer can represents a valid Store.
	std::pair<Store, const SubRegion *>
	findLazyBinding(RegionBindingsConstRef B, const SubRegion *R,
	const SubRegion *originalRegion);

	/// Returns the cached set of interesting SVals contained within a lazy
	/// binding.
	///
	/// The precise value of "interesting" is determined for the purposes of
	/// RegionStore's internal analysis. It must always contain all regions and
	/// symbols, but may omit constants and other kinds of SVal.
	const SValListTy &getInterestingValues(nonloc::LazyCompoundVal LCV);

	//===------------------------------------------------------------------===//
	// State pruning.
	//===------------------------------------------------------------------===//

	/// removeDeadBindings - Scans the RegionStore of 'state' for dead values.
	/// It returns a new Store with these values removed.
	StoreRef removeDeadBindings(Store store, const StackFrameContext *LCtx,
	SymbolReaper& SymReaper) override;

	//===------------------------------------------------------------------===//
	// Region "extents".
	//===------------------------------------------------------------------===//

	// FIXME: This method will soon be eliminated; see the note in Store.h.
	DefinedOrUnknownSVal getSizeInElements(ProgramStateRef state,
	const MemRegion* R,
	QualType EleTy) override;

	//===------------------------------------------------------------------===//
	// Utility methods.
	//===------------------------------------------------------------------===//

	RegionBindingsRef getRegionBindings(Store store) const {
	return RegionBindingsRef(CBFactory,
	static_cast<const RegionBindings::TreeTy*>(store),
	RBFactory.getTreeFactory());
	}

	void print(Store store, raw_ostream &Out, const char* nl,
	const char *sep) override;

	void iterBindings(Store store, BindingsHandler& f) override {
	RegionBindingsRef B = getRegionBindings(store);
	for (RegionBindingsRef::iterator I = B.begin(), E = B.end(); I != E; ++I) {
	const ClusterBindings &Cluster = I.getData();
	for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end();
	CI != CE; ++CI) {
	const BindingKey &K = CI.getKey();
	if (!K.isDirect())
	continue;
	if (const SubRegion *R = dyn_cast<SubRegion>(K.getRegion())) {
	// FIXME: Possibly incorporate the offset?
	if (!f.HandleBinding(*this, store, R, CI.getData()))
	return;
	}
	}
	}
	}
	};

	} // end anonymous namespace

	//===----------------------------------------------------------------------===//
	// RegionStore creation.
	//===----------------------------------------------------------------------===//

	std::unique_ptr<StoreManager>
	ento::CreateRegionStoreManager(ProgramStateManager &StMgr) {
	RegionStoreFeatures F = maximal_features_tag();
	return llvm::make_unique<RegionStoreManager>(StMgr, F);
	}

	std::unique_ptr<StoreManager>
	ento::CreateFieldsOnlyRegionStoreManager(ProgramStateManager &StMgr) {
	RegionStoreFeatures F = minimal_features_tag();
	F.enableFields(true);
	return llvm::make_unique<RegionStoreManager>(StMgr, F);
	}


	//===----------------------------------------------------------------------===//
	// Region Cluster analysis.
	//===----------------------------------------------------------------------===//

	namespace {
	/// Used to determine which global regions are automatically included in the
	/// initial worklist of a ClusterAnalysis.
	enum GlobalsFilterKind {
	/// Don't include any global regions.
	GFK_None,
	/// Only include system globals.
	GFK_SystemOnly,
	/// Include all global regions.
	GFK_All
	};

	template <typename DERIVED>
	class ClusterAnalysis {
	protected:
	typedef llvm::DenseMap<const MemRegion , const ClusterBindings > ClusterMap;
	typedef const MemRegion * WorkListElement;
	typedef SmallVector<WorkListElement, 10> WorkList;

	llvm::SmallPtrSet<const ClusterBindings *, 16> Visited;

	WorkList WL;

	RegionStoreManager &RM;
	ASTContext &Ctx;
	SValBuilder &svalBuilder;

	RegionBindingsRef B;


	protected:
	const ClusterBindings getCluster(const MemRegion R) {
	return B.lookup(R);
	}

	/// Returns true if all clusters in the given memspace should be initially
	/// included in the cluster analysis. Subclasses may provide their
	/// own implementation.
	bool includeEntireMemorySpace(const MemRegion *Base) {
	return false;
	}

	public:
	ClusterAnalysis(RegionStoreManager &rm, ProgramStateManager &StateMgr,
	RegionBindingsRef b)
	: RM(rm), Ctx(StateMgr.getContext()),
	svalBuilder(StateMgr.getSValBuilder()), B(std::move(b)) {}

	RegionBindingsRef getRegionBindings() const { return B; }

	bool isVisited(const MemRegion *R) {
	return Visited.count(getCluster(R));
	}

	void GenerateClusters() {
	// Scan the entire set of bindings and record the region clusters.
	for (RegionBindingsRef::iterator RI = B.begin(), RE = B.end();
	RI != RE; ++RI){
	const MemRegion *Base = RI.getKey();

	const ClusterBindings &Cluster = RI.getData();
	assert(!Cluster.isEmpty() && "Empty clusters should be removed");
	static_cast<DERIVED*>(this)->VisitAddedToCluster(Base, Cluster);

	// If the base's memspace should be entirely invalidated, add the cluster
	// to the workspace up front.
	if (static_cast<DERIVED*>(this)->includeEntireMemorySpace(Base))
	AddToWorkList(WorkListElement(Base), &Cluster);
	}
	}

	bool AddToWorkList(WorkListElement E, const ClusterBindings *C) {
	if (C && !Visited.insert(C).second)
	return false;
	WL.push_back(E);
	return true;
	}

	bool AddToWorkList(const MemRegion *R) {
	return static_cast<DERIVED*>(this)->AddToWorkList(R);
	}

	void RunWorkList() {
	while (!WL.empty()) {
	WorkListElement E = WL.pop_back_val();
	const MemRegion *BaseR = E;

	static_cast<DERIVED*>(this)->VisitCluster(BaseR, getCluster(BaseR));
	}
	}

	void VisitAddedToCluster(const MemRegion *baseR, const ClusterBindings &C) {}
	void VisitCluster(const MemRegion baseR, const ClusterBindings C) {}

	void VisitCluster(const MemRegion BaseR, const ClusterBindings C,
	bool Flag) {
	static_cast<DERIVED*>(this)->VisitCluster(BaseR, C);
	}
	};
	}

	//===----------------------------------------------------------------------===//
	// Binding invalidation.
	//===----------------------------------------------------------------------===//

	bool RegionStoreManager::scanReachableSymbols(Store S, const MemRegion *R,
	ScanReachableSymbols &Callbacks) {
	assert(R == R->getBaseRegion() && "Should only be called for base regions");
	RegionBindingsRef B = getRegionBindings(S);
	const ClusterBindings *Cluster = B.lookup(R);

	if (!Cluster)
	return true;

	for (ClusterBindings::iterator RI = Cluster->begin(), RE = Cluster->end();
	RI != RE; ++RI) {
	if (!Callbacks.scan(RI.getData()))
	return false;
	}

	return true;
	}

	static inline bool isUnionField(const FieldRegion *FR) {
	return FR->getDecl()->getParent()->isUnion();
	}

	typedef SmallVector<const FieldDecl *, 8> FieldVector;

	static void getSymbolicOffsetFields(BindingKey K, FieldVector &Fields) {
	assert(K.hasSymbolicOffset() && "Not implemented for concrete offset keys");

	const MemRegion *Base = K.getConcreteOffsetRegion();
	const MemRegion *R = K.getRegion();

	while (R != Base) {
	if (const FieldRegion *FR = dyn_cast<FieldRegion>(R))
	if (!isUnionField(FR))
	Fields.push_back(FR->getDecl());

	R = cast<SubRegion>(R)->getSuperRegion();
	}
	}

	static bool isCompatibleWithFields(BindingKey K, const FieldVector &Fields) {
	assert(K.hasSymbolicOffset() && "Not implemented for concrete offset keys");

	if (Fields.empty())
	return true;

	FieldVector FieldsInBindingKey;
	getSymbolicOffsetFields(K, FieldsInBindingKey);

	ptrdiff_t Delta = FieldsInBindingKey.size() - Fields.size();
	if (Delta >= 0)
	return std::equal(FieldsInBindingKey.begin() + Delta,
	FieldsInBindingKey.end(),
	Fields.begin());
	else
	return std::equal(FieldsInBindingKey.begin(), FieldsInBindingKey.end(),
	Fields.begin() - Delta);
	}

	/// Collects all bindings in \p Cluster that may refer to bindings within
	/// \p Top.
	///
	/// Each binding is a pair whose \c first is the key (a BindingKey) and whose
	/// \c second is the value (an SVal).
	///
	/// The \p IncludeAllDefaultBindings parameter specifies whether to include
	/// default bindings that may extend beyond \p Top itself, e.g. if \p Top is
	/// an aggregate within a larger aggregate with a default binding.
	static void
	collectSubRegionBindings(SmallVectorImpl<BindingPair> &Bindings,
	SValBuilder &SVB, const ClusterBindings &Cluster,
	const SubRegion *Top, BindingKey TopKey,
	bool IncludeAllDefaultBindings) {
	FieldVector FieldsInSymbolicSubregions;
	if (TopKey.hasSymbolicOffset()) {
	getSymbolicOffsetFields(TopKey, FieldsInSymbolicSubregions);
	Top = cast<SubRegion>(TopKey.getConcreteOffsetRegion());
	TopKey = BindingKey::Make(Top, BindingKey::Default);
	}

	// Find the length (in bits) of the region being invalidated.
	uint64_t Length = UINT64_MAX;
	SVal Extent = Top->getExtent(SVB);
	if (Optional<nonloc::ConcreteInt> ExtentCI =
	Extent.getAs<nonloc::ConcreteInt>()) {
	const llvm::APSInt &ExtentInt = ExtentCI->getValue();
	assert(ExtentInt.isNonNegative() \|\| ExtentInt.isUnsigned());
	// Extents are in bytes but region offsets are in bits. Be careful!
	Length = ExtentInt.getLimitedValue() * SVB.getContext().getCharWidth();
	} else if (const FieldRegion *FR = dyn_cast<FieldRegion>(Top)) {
	if (FR->getDecl()->isBitField())
	Length = FR->getDecl()->getBitWidthValue(SVB.getContext());
	}

	for (ClusterBindings::iterator I = Cluster.begin(), E = Cluster.end();
	I != E; ++I) {
	BindingKey NextKey = I.getKey();
	if (NextKey.getRegion() == TopKey.getRegion()) {
	// FIXME: This doesn't catch the case where we're really invalidating a
	// region with a symbolic offset. Example:
	// R: points[i].y
	// Next: points[0].x

	if (NextKey.getOffset() > TopKey.getOffset() &&
	NextKey.getOffset() - TopKey.getOffset() < Length) {
	// Case 1: The next binding is inside the region we're invalidating.
	// Include it.
	Bindings.push_back(*I);

	} else if (NextKey.getOffset() == TopKey.getOffset()) {
	// Case 2: The next binding is at the same offset as the region we're
	// invalidating. In this case, we need to leave default bindings alone,
	// since they may be providing a default value for a regions beyond what
	// we're invalidating.
	// FIXME: This is probably incorrect; consider invalidating an outer
	// struct whose first field is bound to a LazyCompoundVal.
	if (IncludeAllDefaultBindings \|\| NextKey.isDirect())
	Bindings.push_back(*I);
	}

	} else if (NextKey.hasSymbolicOffset()) {
	const MemRegion *Base = NextKey.getConcreteOffsetRegion();
	if (Top->isSubRegionOf(Base)) {
	// Case 3: The next key is symbolic and we just changed something within
	// its concrete region. We don't know if the binding is still valid, so
	// we'll be conservative and include it.
	if (IncludeAllDefaultBindings \|\| NextKey.isDirect())
	if (isCompatibleWithFields(NextKey, FieldsInSymbolicSubregions))
	Bindings.push_back(*I);
	} else if (const SubRegion *BaseSR = dyn_cast<SubRegion>(Base)) {
	// Case 4: The next key is symbolic, but we changed a known
	// super-region. In this case the binding is certainly included.
	if (Top == Base \|\| BaseSR->isSubRegionOf(Top))
	if (isCompatibleWithFields(NextKey, FieldsInSymbolicSubregions))
	Bindings.push_back(*I);
	}
	}
	}
	}

	static void
	collectSubRegionBindings(SmallVectorImpl<BindingPair> &Bindings,
	SValBuilder &SVB, const ClusterBindings &Cluster,
	const SubRegion *Top, bool IncludeAllDefaultBindings) {
	collectSubRegionBindings(Bindings, SVB, Cluster, Top,
	BindingKey::Make(Top, BindingKey::Default),
	IncludeAllDefaultBindings);
	}

	RegionBindingsRef
	RegionStoreManager::removeSubRegionBindings(RegionBindingsConstRef B,
	const SubRegion *Top) {
	BindingKey TopKey = BindingKey::Make(Top, BindingKey::Default);
	const MemRegion *ClusterHead = TopKey.getBaseRegion();

	if (Top == ClusterHead) {
	// We can remove an entire cluster's bindings all in one go.
	return B.remove(Top);
	}

	const ClusterBindings *Cluster = B.lookup(ClusterHead);
	if (!Cluster) {
	// If we're invalidating a region with a symbolic offset, we need to make
	// sure we don't treat the base region as uninitialized anymore.
	if (TopKey.hasSymbolicOffset()) {
	const SubRegion *Concrete = TopKey.getConcreteOffsetRegion();
	return B.addBinding(Concrete, BindingKey::Default, UnknownVal());
	}
	return B;
	}

	SmallVector<BindingPair, 32> Bindings;
	collectSubRegionBindings(Bindings, svalBuilder, *Cluster, Top, TopKey,
	/IncludeAllDefaultBindings=/false);

	ClusterBindingsRef Result(*Cluster, CBFactory);
	for (SmallVectorImpl<BindingPair>::const_iterator I = Bindings.begin(),
	E = Bindings.end();
	I != E; ++I)
	Result = Result.remove(I->first);

	// If we're invalidating a region with a symbolic offset, we need to make sure
	// we don't treat the base region as uninitialized anymore.
	// FIXME: This isn't very precise; see the example in
	// collectSubRegionBindings.
	if (TopKey.hasSymbolicOffset()) {
	const SubRegion *Concrete = TopKey.getConcreteOffsetRegion();
	Result = Result.add(BindingKey::Make(Concrete, BindingKey::Default),
	UnknownVal());
	}

	if (Result.isEmpty())
	return B.remove(ClusterHead);
	return B.add(ClusterHead, Result.asImmutableMap());
	}

	namespace {
	class invalidateRegionsWorker : public ClusterAnalysis<invalidateRegionsWorker>
	{
	const Expr *Ex;
	unsigned Count;
	const LocationContext *LCtx;
	InvalidatedSymbols &IS;
	RegionAndSymbolInvalidationTraits &ITraits;
	StoreManager::InvalidatedRegions *Regions;
	GlobalsFilterKind GlobalsFilter;
	public:
	invalidateRegionsWorker(RegionStoreManager &rm,
	ProgramStateManager &stateMgr,
	RegionBindingsRef b,
	const Expr *ex, unsigned count,
	const LocationContext *lctx,
	InvalidatedSymbols &is,
	RegionAndSymbolInvalidationTraits &ITraitsIn,
	StoreManager::InvalidatedRegions *r,
	GlobalsFilterKind GFK)
	: ClusterAnalysis<invalidateRegionsWorker>(rm, stateMgr, b),
	Ex(ex), Count(count), LCtx(lctx), IS(is), ITraits(ITraitsIn), Regions(r),
	GlobalsFilter(GFK) {}

	void VisitCluster(const MemRegion baseR, const ClusterBindings C);
	void VisitBinding(SVal V);

	using ClusterAnalysis::AddToWorkList;

	bool AddToWorkList(const MemRegion *R);

	/// Returns true if all clusters in the memory space for \p Base should be
	/// be invalidated.
	bool includeEntireMemorySpace(const MemRegion *Base);

	/// Returns true if the memory space of the given region is one of the global
	/// regions specially included at the start of invalidation.
	bool isInitiallyIncludedGlobalRegion(const MemRegion *R);
	};
	}

	bool invalidateRegionsWorker::AddToWorkList(const MemRegion *R) {
	bool doNotInvalidateSuperRegion = ITraits.hasTrait(
	R, RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
	const MemRegion *BaseR = doNotInvalidateSuperRegion ? R : R->getBaseRegion();
	return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR));
	}

	void invalidateRegionsWorker::VisitBinding(SVal V) {
	// A symbol? Mark it touched by the invalidation.
	if (SymbolRef Sym = V.getAsSymbol())
	IS.insert(Sym);

	if (const MemRegion *R = V.getAsRegion()) {
	AddToWorkList(R);
	return;
	}

	// Is it a LazyCompoundVal? All references get invalidated as well.
	if (Optional<nonloc::LazyCompoundVal> LCS =
	V.getAs<nonloc::LazyCompoundVal>()) {

	const RegionStoreManager::SValListTy &Vals = RM.getInterestingValues(*LCS);

	for (RegionStoreManager::SValListTy::const_iterator I = Vals.begin(),
	E = Vals.end();
	I != E; ++I)
	VisitBinding(*I);

	return;
	}
	}

	void invalidateRegionsWorker::VisitCluster(const MemRegion *baseR,
	const ClusterBindings *C) {

	bool PreserveRegionsContents =
	ITraits.hasTrait(baseR,
	RegionAndSymbolInvalidationTraits::TK_PreserveContents);

	if (C) {
	for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E; ++I)
	VisitBinding(I.getData());

	// Invalidate regions contents.
	if (!PreserveRegionsContents)
	B = B.remove(baseR);
	}

	// BlockDataRegion? If so, invalidate captured variables that are passed
	// by reference.
	if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(baseR)) {
	for (BlockDataRegion::referenced_vars_iterator
	BI = BR->referenced_vars_begin(), BE = BR->referenced_vars_end() ;
	BI != BE; ++BI) {
	const VarRegion *VR = BI.getCapturedRegion();
	const VarDecl *VD = VR->getDecl();
	if (VD->hasAttr<BlocksAttr>() \|\| !VD->hasLocalStorage()) {
	AddToWorkList(VR);
	}
	else if (Loc::isLocType(VR->getValueType())) {
	// Map the current bindings to a Store to retrieve the value
	// of the binding. If that binding itself is a region, we should
	// invalidate that region. This is because a block may capture
	// a pointer value, but the thing pointed by that pointer may
	// get invalidated.
	SVal V = RM.getBinding(B, loc::MemRegionVal(VR));
	if (Optional<Loc> L = V.getAs<Loc>()) {
	if (const MemRegion *LR = L->getAsRegion())
	AddToWorkList(LR);
	}
	}
	}
	return;
	}

	// Symbolic region?
	if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(baseR))
	IS.insert(SR->getSymbol());

	// Nothing else should be done in the case when we preserve regions context.
	if (PreserveRegionsContents)
	return;

	// Otherwise, we have a normal data region. Record that we touched the region.
	if (Regions)
	Regions->push_back(baseR);

	if (isa<AllocaRegion>(baseR) \|\| isa<SymbolicRegion>(baseR)) {
	// Invalidate the region by setting its default value to
	// conjured symbol. The type of the symbol is irrelevant.
	DefinedOrUnknownSVal V =
	svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, Ctx.IntTy, Count);
	B = B.addBinding(baseR, BindingKey::Default, V);
	return;
	}

	if (!baseR->isBoundable())
	return;

	const TypedValueRegion *TR = cast<TypedValueRegion>(baseR);
	QualType T = TR->getValueType();

	if (isInitiallyIncludedGlobalRegion(baseR)) {
	// If the region is a global and we are invalidating all globals,
	// erasing the entry is good enough. This causes all globals to be lazily
	// symbolicated from the same base symbol.
	return;
	}

	if (T->isStructureOrClassType()) {
	// Invalidate the region by setting its default value to
	// conjured symbol. The type of the symbol is irrelevant.
	DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx,
	Ctx.IntTy, Count);
	B = B.addBinding(baseR, BindingKey::Default, V);
	return;
	}

	if (const ArrayType *AT = Ctx.getAsArrayType(T)) {
	bool doNotInvalidateSuperRegion = ITraits.hasTrait(
	baseR,
	RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);

	if (doNotInvalidateSuperRegion) {
	// We are not doing blank invalidation of the whole array region so we
	// have to manually invalidate each elements.
	Optional<uint64_t> NumElements;

	// Compute lower and upper offsets for region within array.
	if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
	NumElements = CAT->getSize().getZExtValue();
	if (!NumElements) // We are not dealing with a constant size array
	goto conjure_default;
	QualType ElementTy = AT->getElementType();
	uint64_t ElemSize = Ctx.getTypeSize(ElementTy);
	const RegionOffset &RO = baseR->getAsOffset();
	const MemRegion *SuperR = baseR->getBaseRegion();
	if (RO.hasSymbolicOffset()) {
	// If base region has a symbolic offset,
	// we revert to invalidating the super region.
	if (SuperR)
	AddToWorkList(SuperR);
	goto conjure_default;
	}

	uint64_t LowerOffset = RO.getOffset();
	uint64_t UpperOffset = LowerOffset + NumElements ElemSize;
	bool UpperOverflow = UpperOffset < LowerOffset;

	// Invalidate regions which are within array boundaries,
	// or have a symbolic offset.
	if (!SuperR)
	goto conjure_default;

	const ClusterBindings *C = B.lookup(SuperR);
	if (!C)
	goto conjure_default;

	for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E;
	++I) {
	const BindingKey &BK = I.getKey();
	Optional<uint64_t> ROffset =
	BK.hasSymbolicOffset() ? Optional<uint64_t>() : BK.getOffset();

	// Check offset is not symbolic and within array's boundaries.
	// Handles arrays of 0 elements and of 0-sized elements as well.
	if (!ROffset \|\|
	((ROffset >= LowerOffset && ROffset < UpperOffset) \|\|
	(UpperOverflow &&
	(ROffset >= LowerOffset \|\| ROffset < UpperOffset)) \|\|
	(LowerOffset == UpperOffset && *ROffset == LowerOffset))) {
	B = B.removeBinding(I.getKey());
	// Bound symbolic regions need to be invalidated for dead symbol
	// detection.
	SVal V = I.getData();
	const MemRegion *R = V.getAsRegion();
	if (R && isa<SymbolicRegion>(R))
	VisitBinding(V);
	}
	}
	}
	conjure_default:
	// Set the default value of the array to conjured symbol.
	DefinedOrUnknownSVal V =
	svalBuilder.conjureSymbolVal(baseR, Ex, LCtx,
	AT->getElementType(), Count);
	B = B.addBinding(baseR, BindingKey::Default, V);
	return;
	}

	DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx,
	T,Count);
	assert(SymbolManager::canSymbolicate(T) \|\| V.isUnknown());
	B = B.addBinding(baseR, BindingKey::Direct, V);
	}

	bool invalidateRegionsWorker::isInitiallyIncludedGlobalRegion(
	const MemRegion *R) {
	switch (GlobalsFilter) {
	case GFK_None:
	return false;
	case GFK_SystemOnly:
	return isa<GlobalSystemSpaceRegion>(R->getMemorySpace());
	case GFK_All:
	return isa<NonStaticGlobalSpaceRegion>(R->getMemorySpace());
	}

	llvm_unreachable("unknown globals filter");
	}

	bool invalidateRegionsWorker::includeEntireMemorySpace(const MemRegion *Base) {
	if (isInitiallyIncludedGlobalRegion(Base))
	return true;

	const MemSpaceRegion *MemSpace = Base->getMemorySpace();
	return ITraits.hasTrait(MemSpace,
	RegionAndSymbolInvalidationTraits::TK_EntireMemSpace);
	}

	RegionBindingsRef
	RegionStoreManager::invalidateGlobalRegion(MemRegion::Kind K,
	const Expr *Ex,
	unsigned Count,
	const LocationContext *LCtx,
	RegionBindingsRef B,
	InvalidatedRegions *Invalidated) {
	// Bind the globals memory space to a new symbol that we will use to derive
	// the bindings for all globals.
	const GlobalsSpaceRegion *GS = MRMgr.getGlobalsRegion(K);
	SVal V = svalBuilder.conjureSymbolVal(/* SymbolTag = / (const void) GS, Ex, LCtx,
	/* type does not matter */ Ctx.IntTy,
	Count);

	B = B.removeBinding(GS)
	.addBinding(BindingKey::Make(GS, BindingKey::Default), V);

	// Even if there are no bindings in the global scope, we still need to
	// record that we touched it.
	if (Invalidated)
	Invalidated->push_back(GS);

	return B;
	}

	void RegionStoreManager::populateWorkList(invalidateRegionsWorker &W,
	ArrayRef<SVal> Values,
	InvalidatedRegions *TopLevelRegions) {
	for (ArrayRef<SVal>::iterator I = Values.begin(),
	E = Values.end(); I != E; ++I) {
	SVal V = *I;
	if (Optional<nonloc::LazyCompoundVal> LCS =
	V.getAs<nonloc::LazyCompoundVal>()) {

	const SValListTy &Vals = getInterestingValues(*LCS);

	for (SValListTy::const_iterator I = Vals.begin(),
	E = Vals.end(); I != E; ++I) {
	// Note: the last argument is false here because these are
	// non-top-level regions.
	if (const MemRegion R = (I).getAsRegion())
	W.AddToWorkList(R);
	}
	continue;
	}

	if (const MemRegion *R = V.getAsRegion()) {
	if (TopLevelRegions)
	TopLevelRegions->push_back(R);
	W.AddToWorkList(R);
	continue;
	}
	}
	}

	StoreRef
	RegionStoreManager::invalidateRegions(Store store,
	ArrayRef<SVal> Values,
	const Expr *Ex, unsigned Count,
	const LocationContext *LCtx,
	const CallEvent *Call,
	InvalidatedSymbols &IS,
	RegionAndSymbolInvalidationTraits &ITraits,
	InvalidatedRegions *TopLevelRegions,
	InvalidatedRegions *Invalidated) {
	GlobalsFilterKind GlobalsFilter;
	if (Call) {
	if (Call->isInSystemHeader())
	GlobalsFilter = GFK_SystemOnly;
	else
	GlobalsFilter = GFK_All;
	} else {
	GlobalsFilter = GFK_None;
	}

	RegionBindingsRef B = getRegionBindings(store);
	invalidateRegionsWorker W(*this, StateMgr, B, Ex, Count, LCtx, IS, ITraits,
	Invalidated, GlobalsFilter);

	// Scan the bindings and generate the clusters.
	W.GenerateClusters();

	// Add the regions to the worklist.
	populateWorkList(W, Values, TopLevelRegions);

	W.RunWorkList();

	// Return the new bindings.
	B = W.getRegionBindings();

	// For calls, determine which global regions should be invalidated and
	// invalidate them. (Note that function-static and immutable globals are never
	// invalidated by this.)
	// TODO: This could possibly be more precise with modules.
	switch (GlobalsFilter) {
	case GFK_All:
	B = invalidateGlobalRegion(MemRegion::GlobalInternalSpaceRegionKind,
	Ex, Count, LCtx, B, Invalidated);
	// FALLTHROUGH
	case GFK_SystemOnly:
	B = invalidateGlobalRegion(MemRegion::GlobalSystemSpaceRegionKind,
	Ex, Count, LCtx, B, Invalidated);
	// FALLTHROUGH
	case GFK_None:
	break;
	}

	return StoreRef(B.asStore(), *this);
	}

	//===----------------------------------------------------------------------===//
	// Extents for regions.
	//===----------------------------------------------------------------------===//

	DefinedOrUnknownSVal
	RegionStoreManager::getSizeInElements(ProgramStateRef state,
	const MemRegion *R,
	QualType EleTy) {
	SVal Size = cast<SubRegion>(R)->getExtent(svalBuilder);
	const llvm::APSInt *SizeInt = svalBuilder.getKnownValue(state, Size);
	if (!SizeInt)
	return UnknownVal();

	CharUnits RegionSize = CharUnits::fromQuantity(SizeInt->getSExtValue());

	if (Ctx.getAsVariableArrayType(EleTy)) {
	// FIXME: We need to track extra state to properly record the size
	// of VLAs. Returning UnknownVal here, however, is a stop-gap so that
	// we don't have a divide-by-zero below.
	return UnknownVal();
	}

	CharUnits EleSize = Ctx.getTypeSizeInChars(EleTy);

	// If a variable is reinterpreted as a type that doesn't fit into a larger
	// type evenly, round it down.
	// This is a signed value, since it's used in arithmetic with signed indices.
	return svalBuilder.makeIntVal(RegionSize / EleSize, false);
	}

	//===----------------------------------------------------------------------===//
	// Location and region casting.
	//===----------------------------------------------------------------------===//

	/// ArrayToPointer - Emulates the "decay" of an array to a pointer
	/// type. 'Array' represents the lvalue of the array being decayed
	/// to a pointer, and the returned SVal represents the decayed
	/// version of that lvalue (i.e., a pointer to the first element of
	/// the array). This is called by ExprEngine when evaluating casts
	/// from arrays to pointers.
	SVal RegionStoreManager::ArrayToPointer(Loc Array, QualType T) {
	if (Array.getAs<loc::ConcreteInt>())
	return Array;

	if (!Array.getAs<loc::MemRegionVal>())
	return UnknownVal();

	const SubRegion *R =
	cast<SubRegion>(Array.castAs<loc::MemRegionVal>().getRegion());
	NonLoc ZeroIdx = svalBuilder.makeZeroArrayIndex();
	return loc::MemRegionVal(MRMgr.getElementRegion(T, ZeroIdx, R, Ctx));
	}

	//===----------------------------------------------------------------------===//
	// Loading values from regions.
	//===----------------------------------------------------------------------===//

	SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T) {
	assert(!L.getAs<UnknownVal>() && "location unknown");
	assert(!L.getAs<UndefinedVal>() && "location undefined");

	// For access to concrete addresses, return UnknownVal. Checks
	// for null dereferences (and similar errors) are done by checkers, not
	// the Store.
	// FIXME: We can consider lazily symbolicating such memory, but we really
	// should defer this when we can reason easily about symbolicating arrays
	// of bytes.
	if (L.getAs<loc::ConcreteInt>()) {
	return UnknownVal();
	}
	if (!L.getAs<loc::MemRegionVal>()) {
	return UnknownVal();
	}

	const MemRegion *MR = L.castAs<loc::MemRegionVal>().getRegion();

	if (isa<BlockDataRegion>(MR)) {
	return UnknownVal();
	}

	if (isa<AllocaRegion>(MR) \|\|
	isa<SymbolicRegion>(MR) \|\|
	isa<CodeTextRegion>(MR)) {
	if (T.isNull()) {
	if (const TypedRegion *TR = dyn_cast<TypedRegion>(MR))
	T = TR->getLocationType();
	else {
	const SymbolicRegion *SR = cast<SymbolicRegion>(MR);
	T = SR->getSymbol()->getType();
	}
	}
	MR = GetElementZeroRegion(cast<SubRegion>(MR), T);
	}

	// FIXME: Perhaps this method should just take a 'const MemRegion*' argument
	// instead of 'Loc', and have the other Loc cases handled at a higher level.
	const TypedValueRegion *R = cast<TypedValueRegion>(MR);
	QualType RTy = R->getValueType();

	// FIXME: we do not yet model the parts of a complex type, so treat the
	// whole thing as "unknown".
	if (RTy->isAnyComplexType())
	return UnknownVal();

	// FIXME: We should eventually handle funny addressing. e.g.:
	//
	// int x = ...;
	// int *p = &x;
	// char q = (char) p;
	// char c = *q; // returns the first byte of 'x'.
	//
	// Such funny addressing will occur due to layering of regions.
	if (RTy->isStructureOrClassType())
	return getBindingForStruct(B, R);

	// FIXME: Handle unions.
	if (RTy->isUnionType())
	return createLazyBinding(B, R);

	if (RTy->isArrayType()) {
	if (RTy->isConstantArrayType())
	return getBindingForArray(B, R);
	else
	return UnknownVal();
	}

	// FIXME: handle Vector types.
	if (RTy->isVectorType())
	return UnknownVal();

	if (const FieldRegion* FR = dyn_cast<FieldRegion>(R))
	return CastRetrievedVal(getBindingForField(B, FR), FR, T, false);

	if (const ElementRegion* ER = dyn_cast<ElementRegion>(R)) {
	// FIXME: Here we actually perform an implicit conversion from the loaded
	// value to the element type. Eventually we want to compose these values
	// more intelligently. For example, an 'element' can encompass multiple
	// bound regions (e.g., several bound bytes), or could be a subset of
	// a larger value.
	return CastRetrievedVal(getBindingForElement(B, ER), ER, T, false);
	}

	if (const ObjCIvarRegion *IVR = dyn_cast<ObjCIvarRegion>(R)) {
	// FIXME: Here we actually perform an implicit conversion from the loaded
	// value to the ivar type. What we should model is stores to ivars
	// that blow past the extent of the ivar. If the address of the ivar is
	// reinterpretted, it is possible we stored a different value that could
	// fit within the ivar. Either we need to cast these when storing them
	// or reinterpret them lazily (as we do here).
	return CastRetrievedVal(getBindingForObjCIvar(B, IVR), IVR, T, false);
	}

	if (const VarRegion *VR = dyn_cast<VarRegion>(R)) {
	// FIXME: Here we actually perform an implicit conversion from the loaded
	// value to the variable type. What we should model is stores to variables
	// that blow past the extent of the variable. If the address of the
	// variable is reinterpretted, it is possible we stored a different value
	// that could fit within the variable. Either we need to cast these when
	// storing them or reinterpret them lazily (as we do here).
	return CastRetrievedVal(getBindingForVar(B, VR), VR, T, false);
	}

	const SVal *V = B.lookup(R, BindingKey::Direct);

	// Check if the region has a binding.
	if (V)
	return *V;

	// The location does not have a bound value. This means that it has
	// the value it had upon its creation and/or entry to the analyzed
	// function/method. These are either symbolic values or 'undefined'.
	if (R->hasStackNonParametersStorage()) {
	// All stack variables are considered to have undefined values
	// upon creation. All heap allocated blocks are considered to
	// have undefined values as well unless they are explicitly bound
	// to specific values.
	return UndefinedVal();
	}

	// All other values are symbolic.
	return svalBuilder.getRegionValueSymbolVal(R);
	}

	static QualType getUnderlyingType(const SubRegion *R) {
	QualType RegionTy;
	if (const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(R))
	RegionTy = TVR->getValueType();

	if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R))
	RegionTy = SR->getSymbol()->getType();

	return RegionTy;
	}

	/// Checks to see if store \p B has a lazy binding for region \p R.
	///
	/// If \p AllowSubregionBindings is \c false, a lazy binding will be rejected
	/// if there are additional bindings within \p R.
	///
	/// Note that unlike RegionStoreManager::findLazyBinding, this will not search
	/// for lazy bindings for super-regions of \p R.
	static Optional<nonloc::LazyCompoundVal>
	getExistingLazyBinding(SValBuilder &SVB, RegionBindingsConstRef B,
	const SubRegion *R, bool AllowSubregionBindings) {
	Optional<SVal> V = B.getDefaultBinding(R);
	if (!V)
	return None;

	Optional<nonloc::LazyCompoundVal> LCV = V->getAs<nonloc::LazyCompoundVal>();
	if (!LCV)
	return None;

	// If the LCV is for a subregion, the types might not match, and we shouldn't
	// reuse the binding.
	QualType RegionTy = getUnderlyingType(R);
	if (!RegionTy.isNull() &&
	!RegionTy->isVoidPointerType()) {
	QualType SourceRegionTy = LCV->getRegion()->getValueType();
	if (!SVB.getContext().hasSameUnqualifiedType(RegionTy, SourceRegionTy))
	return None;
	}

	if (!AllowSubregionBindings) {
	// If there are any other bindings within this region, we shouldn't reuse
	// the top-level binding.
	SmallVector<BindingPair, 16> Bindings;
	collectSubRegionBindings(Bindings, SVB, *B.lookup(R->getBaseRegion()), R,
	/IncludeAllDefaultBindings=/true);
	if (Bindings.size() > 1)
	return None;
	}

	return *LCV;
	}


	std::pair<Store, const SubRegion *>
	RegionStoreManager::findLazyBinding(RegionBindingsConstRef B,
	const SubRegion *R,
	const SubRegion *originalRegion) {
	if (originalRegion != R) {
	if (Optional<nonloc::LazyCompoundVal> V =
	getExistingLazyBinding(svalBuilder, B, R, true))
	return std::make_pair(V->getStore(), V->getRegion());
	}

	typedef std::pair<Store, const SubRegion *> StoreRegionPair;
	StoreRegionPair Result = StoreRegionPair();

	if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
	Result = findLazyBinding(B, cast<SubRegion>(ER->getSuperRegion()),
	originalRegion);

	if (Result.second)
	Result.second = MRMgr.getElementRegionWithSuper(ER, Result.second);

	} else if (const FieldRegion *FR = dyn_cast<FieldRegion>(R)) {
	Result = findLazyBinding(B, cast<SubRegion>(FR->getSuperRegion()),
	originalRegion);

	if (Result.second)
	Result.second = MRMgr.getFieldRegionWithSuper(FR, Result.second);

	} else if (const CXXBaseObjectRegion *BaseReg =
	dyn_cast<CXXBaseObjectRegion>(R)) {
	// C++ base object region is another kind of region that we should blast
	// through to look for lazy compound value. It is like a field region.
	Result = findLazyBinding(B, cast<SubRegion>(BaseReg->getSuperRegion()),
	originalRegion);

	if (Result.second)
	Result.second = MRMgr.getCXXBaseObjectRegionWithSuper(BaseReg,
	Result.second);
	}

	return Result;
	}

	SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
	const ElementRegion* R) {
	// We do not currently model bindings of the CompoundLiteralregion.
	if (isa<CompoundLiteralRegion>(R->getBaseRegion()))
	return UnknownVal();

	// Check if the region has a binding.
	if (const Optional<SVal> &V = B.getDirectBinding(R))
	return *V;

	const MemRegion* superR = R->getSuperRegion();

	// Check if the region is an element region of a string literal.
	if (const StringRegion *StrR=dyn_cast<StringRegion>(superR)) {
	// FIXME: Handle loads from strings where the literal is treated as
	// an integer, e.g., ((unsigned int)"hello")
	QualType T = Ctx.getAsArrayType(StrR->getValueType())->getElementType();
	if (!Ctx.hasSameUnqualifiedType(T, R->getElementType()))
	return UnknownVal();

	const StringLiteral *Str = StrR->getStringLiteral();
	SVal Idx = R->getIndex();
	if (Optional<nonloc::ConcreteInt> CI = Idx.getAs<nonloc::ConcreteInt>()) {
	int64_t i = CI->getValue().getSExtValue();
	// Abort on string underrun. This can be possible by arbitrary
	// clients of getBindingForElement().
	if (i < 0)
	return UndefinedVal();
	int64_t length = Str->getLength();
	// Technically, only i == length is guaranteed to be null.
	// However, such overflows should be caught before reaching this point;
	// the only time such an access would be made is if a string literal was
	// used to initialize a larger array.
	char c = (i >= length) ? '\0' : Str->getCodeUnit(i);
	return svalBuilder.makeIntVal(c, T);
	}
	}

	// Check for loads from a code text region. For such loads, just give up.
	if (isa<CodeTextRegion>(superR))
	return UnknownVal();

	// Handle the case where we are indexing into a larger scalar object.
	// For example, this handles:
	// int x = ...
	// char *y = &x;
	// return *y;
	// FIXME: This is a hack, and doesn't do anything really intelligent yet.
	const RegionRawOffset &O = R->getAsArrayOffset();

	// If we cannot reason about the offset, return an unknown value.
	if (!O.getRegion())
	return UnknownVal();

	if (const TypedValueRegion *baseR =
	dyn_cast_or_null<TypedValueRegion>(O.getRegion())) {
	QualType baseT = baseR->getValueType();
	if (baseT->isScalarType()) {
	QualType elemT = R->getElementType();
	if (elemT->isScalarType()) {
	if (Ctx.getTypeSizeInChars(baseT) >= Ctx.getTypeSizeInChars(elemT)) {
	if (const Optional<SVal> &V = B.getDirectBinding(superR)) {
	if (SymbolRef parentSym = V->getAsSymbol())
	return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R);

	if (V->isUnknownOrUndef())
	return *V;
	// Other cases: give up. We are indexing into a larger object
	// that has some value, but we don't know how to handle that yet.
	return UnknownVal();
	}
	}
	}
	}
	}
	return getBindingForFieldOrElementCommon(B, R, R->getElementType());
	}

	SVal RegionStoreManager::getBindingForField(RegionBindingsConstRef B,
	const FieldRegion* R) {

	// Check if the region has a binding.
	if (const Optional<SVal> &V = B.getDirectBinding(R))
	return *V;

	QualType Ty = R->getValueType();
	return getBindingForFieldOrElementCommon(B, R, Ty);
	}

	Optional<SVal>
	RegionStoreManager::getBindingForDerivedDefaultValue(RegionBindingsConstRef B,
	const MemRegion *superR,
	const TypedValueRegion *R,
	QualType Ty) {

	if (const Optional<SVal> &D = B.getDefaultBinding(superR)) {
	const SVal &val = D.getValue();
	if (SymbolRef parentSym = val.getAsSymbol())
	return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R);

	if (val.isZeroConstant())
	return svalBuilder.makeZeroVal(Ty);

	if (val.isUnknownOrUndef())
	return val;

	// Lazy bindings are usually handled through getExistingLazyBinding().
	// We should unify these two code paths at some point.
	if (val.getAs<nonloc::LazyCompoundVal>() \|\|
	val.getAs<nonloc::CompoundVal>())
	return val;

	llvm_unreachable("Unknown default value");
	}

	return None;
	}

	SVal RegionStoreManager::getLazyBinding(const SubRegion *LazyBindingRegion,
	RegionBindingsRef LazyBinding) {
	SVal Result;
	if (const ElementRegion *ER = dyn_cast<ElementRegion>(LazyBindingRegion))
	Result = getBindingForElement(LazyBinding, ER);
	else
	Result = getBindingForField(LazyBinding,
	cast<FieldRegion>(LazyBindingRegion));

	// FIXME: This is a hack to deal with RegionStore's inability to distinguish a
	// default value for /part/ of an aggregate from a default value for the
	// /entire/ aggregate. The most common case of this is when struct Outer
	// has as its first member a struct Inner, which is copied in from a stack
	// variable. In this case, even if the Outer's default value is symbolic, 0,
	// or unknown, it gets overridden by the Inner's default value of undefined.
	//
	// This is a general problem -- if the Inner is zero-initialized, the Outer
	// will now look zero-initialized. The proper way to solve this is with a
	// new version of RegionStore that tracks the extent of a binding as well
	// as the offset.
	//
	// This hack only takes care of the undefined case because that can very
	// quickly result in a warning.
	if (Result.isUndef())
	Result = UnknownVal();

	return Result;
	}

	SVal
	RegionStoreManager::getBindingForFieldOrElementCommon(RegionBindingsConstRef B,
	const TypedValueRegion *R,
	QualType Ty) {

	// At this point we have already checked in either getBindingForElement or
	// getBindingForField if 'R' has a direct binding.

	// Lazy binding?
	Store lazyBindingStore = nullptr;
	const SubRegion *lazyBindingRegion = nullptr;
	std::tie(lazyBindingStore, lazyBindingRegion) = findLazyBinding(B, R, R);
	if (lazyBindingRegion)
	return getLazyBinding(lazyBindingRegion,
	getRegionBindings(lazyBindingStore));

	// Record whether or not we see a symbolic index. That can completely
	// be out of scope of our lookup.
	bool hasSymbolicIndex = false;

	// FIXME: This is a hack to deal with RegionStore's inability to distinguish a
	// default value for /part/ of an aggregate from a default value for the
	// /entire/ aggregate. The most common case of this is when struct Outer
	// has as its first member a struct Inner, which is copied in from a stack
	// variable. In this case, even if the Outer's default value is symbolic, 0,
	// or unknown, it gets overridden by the Inner's default value of undefined.
	//
	// This is a general problem -- if the Inner is zero-initialized, the Outer
	// will now look zero-initialized. The proper way to solve this is with a
	// new version of RegionStore that tracks the extent of a binding as well
	// as the offset.
	//
	// This hack only takes care of the undefined case because that can very
	// quickly result in a warning.
	bool hasPartialLazyBinding = false;

	const SubRegion *SR = dyn_cast<SubRegion>(R);
	while (SR) {
	const MemRegion *Base = SR->getSuperRegion();
	if (Optional<SVal> D = getBindingForDerivedDefaultValue(B, Base, R, Ty)) {
	if (D->getAs<nonloc::LazyCompoundVal>()) {
	hasPartialLazyBinding = true;
	break;
	}

	return *D;
	}

	if (const ElementRegion *ER = dyn_cast<ElementRegion>(Base)) {
	NonLoc index = ER->getIndex();
	if (!index.isConstant())
	hasSymbolicIndex = true;
	}

	// If our super region is a field or element itself, walk up the region
	// hierarchy to see if there is a default value installed in an ancestor.
	SR = dyn_cast<SubRegion>(Base);
	}

	if (R->hasStackNonParametersStorage()) {
	if (isa<ElementRegion>(R)) {
	// Currently we don't reason specially about Clang-style vectors. Check
	// if superR is a vector and if so return Unknown.
	if (const TypedValueRegion *typedSuperR =
	dyn_cast<TypedValueRegion>(R->getSuperRegion())) {
	if (typedSuperR->getValueType()->isVectorType())
	return UnknownVal();
	}
	}

	// FIXME: We also need to take ElementRegions with symbolic indexes into
	// account. This case handles both directly accessing an ElementRegion
	// with a symbolic offset, but also fields within an element with
	// a symbolic offset.
	if (hasSymbolicIndex)
	return UnknownVal();

	if (!hasPartialLazyBinding)
	return UndefinedVal();
	}

	// All other values are symbolic.
	return svalBuilder.getRegionValueSymbolVal(R);
	}

	SVal RegionStoreManager::getBindingForObjCIvar(RegionBindingsConstRef B,
	const ObjCIvarRegion* R) {
	// Check if the region has a binding.
	if (const Optional<SVal> &V = B.getDirectBinding(R))
	return *V;

	const MemRegion *superR = R->getSuperRegion();

	// Check if the super region has a default binding.
	if (const Optional<SVal> &V = B.getDefaultBinding(superR)) {
	if (SymbolRef parentSym = V->getAsSymbol())
	return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R);

	// Other cases: give up.
	return UnknownVal();
	}

	return getBindingForLazySymbol(R);
	}

	SVal RegionStoreManager::getBindingForVar(RegionBindingsConstRef B,
	const VarRegion *R) {

	// Check if the region has a binding.
	if (const Optional<SVal> &V = B.getDirectBinding(R))
	return *V;

	// Lazily derive a value for the VarRegion.
	const VarDecl *VD = R->getDecl();
	const MemSpaceRegion *MS = R->getMemorySpace();

	// Arguments are always symbolic.
	if (isa<StackArgumentsSpaceRegion>(MS))
	return svalBuilder.getRegionValueSymbolVal(R);

	// Is 'VD' declared constant? If so, retrieve the constant value.
	if (VD->getType().isConstQualified())
	if (const Expr *Init = VD->getInit())
	if (Optional<SVal> V = svalBuilder.getConstantVal(Init))
	return *V;

	// This must come after the check for constants because closure-captured
	// constant variables may appear in UnknownSpaceRegion.
	if (isa<UnknownSpaceRegion>(MS))
	return svalBuilder.getRegionValueSymbolVal(R);

	if (isa<GlobalsSpaceRegion>(MS)) {
	QualType T = VD->getType();

	// Function-scoped static variables are default-initialized to 0; if they
	// have an initializer, it would have been processed by now.
	// FIXME: This is only true when we're starting analysis from main().
	// We're losing a lot of coverage here.
	if (isa<StaticGlobalSpaceRegion>(MS))
	return svalBuilder.makeZeroVal(T);

	if (Optional<SVal> V = getBindingForDerivedDefaultValue(B, MS, R, T)) {
	assert(!V->getAs<nonloc::LazyCompoundVal>());
	return V.getValue();
	}

	return svalBuilder.getRegionValueSymbolVal(R);
	}

	return UndefinedVal();
	}

	SVal RegionStoreManager::getBindingForLazySymbol(const TypedValueRegion *R) {
	// All other values are symbolic.
	return svalBuilder.getRegionValueSymbolVal(R);
	}

	const RegionStoreManager::SValListTy &
	RegionStoreManager::getInterestingValues(nonloc::LazyCompoundVal LCV) {
	// First, check the cache.
	LazyBindingsMapTy::iterator I = LazyBindingsMap.find(LCV.getCVData());
	if (I != LazyBindingsMap.end())
	return I->second;

	// If we don't have a list of values cached, start constructing it.
	SValListTy List;

	const SubRegion *LazyR = LCV.getRegion();
	RegionBindingsRef B = getRegionBindings(LCV.getStore());

	// If this region had /no/ bindings at the time, there are no interesting
	// values to return.
	const ClusterBindings *Cluster = B.lookup(LazyR->getBaseRegion());
	if (!Cluster)
	return (LazyBindingsMap[LCV.getCVData()] = std::move(List));

	SmallVector<BindingPair, 32> Bindings;
	collectSubRegionBindings(Bindings, svalBuilder, *Cluster, LazyR,
	/IncludeAllDefaultBindings=/true);
	for (SmallVectorImpl<BindingPair>::const_iterator I = Bindings.begin(),
	E = Bindings.end();
	I != E; ++I) {
	SVal V = I->second;
	if (V.isUnknownOrUndef() \|\| V.isConstant())
	continue;

	if (Optional<nonloc::LazyCompoundVal> InnerLCV =
	V.getAs<nonloc::LazyCompoundVal>()) {
	const SValListTy &InnerList = getInterestingValues(*InnerLCV);
	List.insert(List.end(), InnerList.begin(), InnerList.end());
	continue;
	}

	List.push_back(V);
	}

	return (LazyBindingsMap[LCV.getCVData()] = std::move(List));
	}

	NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B,
	const TypedValueRegion *R) {
	if (Optional<nonloc::LazyCompoundVal> V =
	getExistingLazyBinding(svalBuilder, B, R, false))
	return *V;

	return svalBuilder.makeLazyCompoundVal(StoreRef(B.asStore(), *this), R);
	}

	static bool isRecordEmpty(const RecordDecl *RD) {
	if (!RD->field_empty())
	return false;
	if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD))
	return CRD->getNumBases() == 0;
	return true;
	}

	SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B,
	const TypedValueRegion *R) {
	const RecordDecl *RD = R->getValueType()->castAs<RecordType>()->getDecl();
	if (!RD->getDefinition() \|\| isRecordEmpty(RD))
	return UnknownVal();

	return createLazyBinding(B, R);
	}

	SVal RegionStoreManager::getBindingForArray(RegionBindingsConstRef B,
	const TypedValueRegion *R) {
	assert(Ctx.getAsConstantArrayType(R->getValueType()) &&
	"Only constant array types can have compound bindings.");

	return createLazyBinding(B, R);
	}

	bool RegionStoreManager::includedInBindings(Store store,
	const MemRegion *region) const {
	RegionBindingsRef B = getRegionBindings(store);
	region = region->getBaseRegion();

	// Quick path: if the base is the head of a cluster, the region is live.
	if (B.lookup(region))
	return true;

	// Slow path: if the region is the VALUE of any binding, it is live.
	for (RegionBindingsRef::iterator RI = B.begin(), RE = B.end(); RI != RE; ++RI) {
	const ClusterBindings &Cluster = RI.getData();
	for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end();
	CI != CE; ++CI) {
	const SVal &D = CI.getData();
	if (const MemRegion *R = D.getAsRegion())
	if (R->getBaseRegion() == region)
	return true;
	}
	}

	return false;
	}

	//===----------------------------------------------------------------------===//
	// Binding values to regions.
	//===----------------------------------------------------------------------===//

	StoreRef RegionStoreManager::killBinding(Store ST, Loc L) {
	if (Optional<loc::MemRegionVal> LV = L.getAs<loc::MemRegionVal>())
	if (const MemRegion* R = LV->getRegion())
	return StoreRef(getRegionBindings(ST).removeBinding(R)
	.asImmutableMap()
	.getRootWithoutRetain(),
	*this);

	return StoreRef(ST, *this);
	}

	RegionBindingsRef
	RegionStoreManager::bind(RegionBindingsConstRef B, Loc L, SVal V) {
	if (L.getAs<loc::ConcreteInt>())
	return B;

	// If we get here, the location should be a region.
	const MemRegion *R = L.castAs<loc::MemRegionVal>().getRegion();

	// Check if the region is a struct region.
	if (const TypedValueRegion* TR = dyn_cast<TypedValueRegion>(R)) {
	QualType Ty = TR->getValueType();
	if (Ty->isArrayType())
	return bindArray(B, TR, V);
	if (Ty->isStructureOrClassType())
	return bindStruct(B, TR, V);
	if (Ty->isVectorType())
	return bindVector(B, TR, V);
	if (Ty->isUnionType())
	return bindAggregate(B, TR, V);
	}

	if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R)) {
	// Binding directly to a symbolic region should be treated as binding
	// to element 0.
	QualType T = SR->getSymbol()->getType();
	if (T->isAnyPointerType() \|\| T->isReferenceType())
	T = T->getPointeeType();

	R = GetElementZeroRegion(SR, T);
	}

	// Clear out bindings that may overlap with this binding.
	RegionBindingsRef NewB = removeSubRegionBindings(B, cast<SubRegion>(R));
	return NewB.addBinding(BindingKey::Make(R, BindingKey::Direct), V);
	}

	RegionBindingsRef
	RegionStoreManager::setImplicitDefaultValue(RegionBindingsConstRef B,
	const MemRegion *R,
	QualType T) {
	SVal V;

	if (Loc::isLocType(T))
	V = svalBuilder.makeNull();
	else if (T->isIntegralOrEnumerationType())
	V = svalBuilder.makeZeroVal(T);
	else if (T->isStructureOrClassType() \|\| T->isArrayType()) {
	// Set the default value to a zero constant when it is a structure
	// or array. The type doesn't really matter.
	V = svalBuilder.makeZeroVal(Ctx.IntTy);
	}
	else {
	// We can't represent values of this type, but we still need to set a value
	// to record that the region has been initialized.
	// If this assertion ever fires, a new case should be added above -- we
	// should know how to default-initialize any value we can symbolicate.
	assert(!SymbolManager::canSymbolicate(T) && "This type is representable");
	V = UnknownVal();
	}

	return B.addBinding(R, BindingKey::Default, V);
	}

	RegionBindingsRef
	RegionStoreManager::bindArray(RegionBindingsConstRef B,
	const TypedValueRegion* R,
	SVal Init) {

	const ArrayType *AT =cast<ArrayType>(Ctx.getCanonicalType(R->getValueType()));
	QualType ElementTy = AT->getElementType();
	Optional<uint64_t> Size;

	if (const ConstantArrayType* CAT = dyn_cast<ConstantArrayType>(AT))
	Size = CAT->getSize().getZExtValue();

	// Check if the init expr is a string literal.
	if (Optional<loc::MemRegionVal> MRV = Init.getAs<loc::MemRegionVal>()) {
	const StringRegion *S = cast<StringRegion>(MRV->getRegion());

	// Treat the string as a lazy compound value.
	StoreRef store(B.asStore(), *this);
	nonloc::LazyCompoundVal LCV = svalBuilder.makeLazyCompoundVal(store, S)
	.castAs<nonloc::LazyCompoundVal>();
	return bindAggregate(B, R, LCV);
	}

	// Handle lazy compound values.
	if (Init.getAs<nonloc::LazyCompoundVal>())
	return bindAggregate(B, R, Init);

	if (Init.isUnknown())
	return bindAggregate(B, R, UnknownVal());

	// Remaining case: explicit compound values.
	const nonloc::CompoundVal& CV = Init.castAs<nonloc::CompoundVal>();
	nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end();
	uint64_t i = 0;

	RegionBindingsRef NewB(B);

	for (; Size.hasValue() ? i < Size.getValue() : true ; ++i, ++VI) {
	// The init list might be shorter than the array length.
	if (VI == VE)
	break;

	const NonLoc &Idx = svalBuilder.makeArrayIndex(i);
	const ElementRegion *ER = MRMgr.getElementRegion(ElementTy, Idx, R, Ctx);

	if (ElementTy->isStructureOrClassType())
	NewB = bindStruct(NewB, ER, *VI);
	else if (ElementTy->isArrayType())
	NewB = bindArray(NewB, ER, *VI);
	else
	NewB = bind(NewB, loc::MemRegionVal(ER), *VI);
	}

	// If the init list is shorter than the array length, set the
	// array default value.
	if (Size.hasValue() && i < Size.getValue())
	NewB = setImplicitDefaultValue(NewB, R, ElementTy);

	return NewB;
	}

	RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B,
	const TypedValueRegion* R,
	SVal V) {
	QualType T = R->getValueType();
	assert(T->isVectorType());
	const VectorType *VT = T->getAs<VectorType>(); // Use getAs for typedefs.

	// Handle lazy compound values and symbolic values.
	if (V.getAs<nonloc::LazyCompoundVal>() \|\| V.getAs<nonloc::SymbolVal>())
	return bindAggregate(B, R, V);

	// We may get non-CompoundVal accidentally due to imprecise cast logic or
	// that we are binding symbolic struct value. Kill the field values, and if
	// the value is symbolic go and bind it as a "default" binding.
	if (!V.getAs<nonloc::CompoundVal>()) {
	return bindAggregate(B, R, UnknownVal());
	}

	QualType ElemType = VT->getElementType();
	nonloc::CompoundVal CV = V.castAs<nonloc::CompoundVal>();
	nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end();
	unsigned index = 0, numElements = VT->getNumElements();
	RegionBindingsRef NewB(B);

	for ( ; index != numElements ; ++index) {
	if (VI == VE)
	break;

	NonLoc Idx = svalBuilder.makeArrayIndex(index);
	const ElementRegion *ER = MRMgr.getElementRegion(ElemType, Idx, R, Ctx);

	if (ElemType->isArrayType())
	NewB = bindArray(NewB, ER, *VI);
	else if (ElemType->isStructureOrClassType())
	NewB = bindStruct(NewB, ER, *VI);
	else
	NewB = bind(NewB, loc::MemRegionVal(ER), *VI);
	}
	return NewB;
	}

	Optional<RegionBindingsRef>
	RegionStoreManager::tryBindSmallStruct(RegionBindingsConstRef B,
	const TypedValueRegion *R,
	const RecordDecl *RD,
	nonloc::LazyCompoundVal LCV) {
	FieldVector Fields;

	if (const CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(RD))
	if (Class->getNumBases() != 0 \|\| Class->getNumVBases() != 0)
	return None;

	for (const auto *FD : RD->fields()) {
	if (FD->isUnnamedBitfield())
	continue;

	// If there are too many fields, or if any of the fields are aggregates,
	// just use the LCV as a default binding.
	if (Fields.size() == SmallStructLimit)
	return None;

	QualType Ty = FD->getType();
	if (!(Ty->isScalarType() \|\| Ty->isReferenceType()))
	return None;

	Fields.push_back(FD);
	}

	RegionBindingsRef NewB = B;

	for (FieldVector::iterator I = Fields.begin(), E = Fields.end(); I != E; ++I){
	const FieldRegion SourceFR = MRMgr.getFieldRegion(I, LCV.getRegion());
	SVal V = getBindingForField(getRegionBindings(LCV.getStore()), SourceFR);

	const FieldRegion DestFR = MRMgr.getFieldRegion(I, R);
	NewB = bind(NewB, loc::MemRegionVal(DestFR), V);
	}

	return NewB;
	}

	RegionBindingsRef RegionStoreManager::bindStruct(RegionBindingsConstRef B,
	const TypedValueRegion* R,
	SVal V) {
	if (!Features.supportsFields())
	return B;

	QualType T = R->getValueType();
	assert(T->isStructureOrClassType());

	const RecordType* RT = T->getAs<RecordType>();
	const RecordDecl *RD = RT->getDecl();

	if (!RD->isCompleteDefinition())
	return B;

	// Handle lazy compound values and symbolic values.
	if (Optional<nonloc::LazyCompoundVal> LCV =
	V.getAs<nonloc::LazyCompoundVal>()) {
	if (Optional<RegionBindingsRef> NewB = tryBindSmallStruct(B, R, RD, *LCV))
	return *NewB;
	return bindAggregate(B, R, V);
	}
	if (V.getAs<nonloc::SymbolVal>())
	return bindAggregate(B, R, V);

	// We may get non-CompoundVal accidentally due to imprecise cast logic or
	// that we are binding symbolic struct value. Kill the field values, and if
	// the value is symbolic go and bind it as a "default" binding.
	if (V.isUnknown() \|\| !V.getAs<nonloc::CompoundVal>())
	return bindAggregate(B, R, UnknownVal());

	const nonloc::CompoundVal& CV = V.castAs<nonloc::CompoundVal>();
	nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end();

	RecordDecl::field_iterator FI, FE;
	RegionBindingsRef NewB(B);

	for (FI = RD->field_begin(), FE = RD->field_end(); FI != FE; ++FI) {

	if (VI == VE)
	break;

	// Skip any unnamed bitfields to stay in sync with the initializers.
	if (FI->isUnnamedBitfield())
	continue;

	QualType FTy = FI->getType();
	const FieldRegion* FR = MRMgr.getFieldRegion(*FI, R);

	if (FTy->isArrayType())
	NewB = bindArray(NewB, FR, *VI);
	else if (FTy->isStructureOrClassType())
	NewB = bindStruct(NewB, FR, *VI);
	else
	NewB = bind(NewB, loc::MemRegionVal(FR), *VI);
	++VI;
	}

	// There may be fewer values in the initialize list than the fields of struct.
	if (FI != FE) {
	NewB = NewB.addBinding(R, BindingKey::Default,
	svalBuilder.makeIntVal(0, false));
	}

	return NewB;
	}

	RegionBindingsRef
	RegionStoreManager::bindAggregate(RegionBindingsConstRef B,
	const TypedRegion *R,
	SVal Val) {
	// Remove the old bindings, using 'R' as the root of all regions
	// we will invalidate. Then add the new binding.
	return removeSubRegionBindings(B, R).addBinding(R, BindingKey::Default, Val);
	}

	//===----------------------------------------------------------------------===//
	// State pruning.
	//===----------------------------------------------------------------------===//

	namespace {
	class removeDeadBindingsWorker :
	public ClusterAnalysis<removeDeadBindingsWorker> {
	SmallVector<const SymbolicRegion*, 12> Postponed;
	SymbolReaper &SymReaper;
	const StackFrameContext *CurrentLCtx;

	public:
	removeDeadBindingsWorker(RegionStoreManager &rm,
	ProgramStateManager &stateMgr,
	RegionBindingsRef b, SymbolReaper &symReaper,
	const StackFrameContext *LCtx)
	: ClusterAnalysis<removeDeadBindingsWorker>(rm, stateMgr, b),
	SymReaper(symReaper), CurrentLCtx(LCtx) {}

	// Called by ClusterAnalysis.
	void VisitAddedToCluster(const MemRegion *baseR, const ClusterBindings &C);
	void VisitCluster(const MemRegion baseR, const ClusterBindings C);
	using ClusterAnalysis<removeDeadBindingsWorker>::VisitCluster;

	using ClusterAnalysis::AddToWorkList;

	bool AddToWorkList(const MemRegion *R);

	bool UpdatePostponed();
	void VisitBinding(SVal V);
	};
	}

	bool removeDeadBindingsWorker::AddToWorkList(const MemRegion *R) {
	const MemRegion *BaseR = R->getBaseRegion();
	return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR));
	}

	void removeDeadBindingsWorker::VisitAddedToCluster(const MemRegion *baseR,
	const ClusterBindings &C) {

	if (const VarRegion *VR = dyn_cast<VarRegion>(baseR)) {
	if (SymReaper.isLive(VR))
	AddToWorkList(baseR, &C);

	return;
	}

	if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(baseR)) {
	if (SymReaper.isLive(SR->getSymbol()))
	AddToWorkList(SR, &C);
	else
	Postponed.push_back(SR);

	return;
	}

	if (isa<NonStaticGlobalSpaceRegion>(baseR)) {
	AddToWorkList(baseR, &C);
	return;
	}

	// CXXThisRegion in the current or parent location context is live.
	if (const CXXThisRegion *TR = dyn_cast<CXXThisRegion>(baseR)) {
	const StackArgumentsSpaceRegion *StackReg =
	cast<StackArgumentsSpaceRegion>(TR->getSuperRegion());
	const StackFrameContext *RegCtx = StackReg->getStackFrame();
	if (CurrentLCtx &&
	(RegCtx == CurrentLCtx \|\| RegCtx->isParentOf(CurrentLCtx)))
	AddToWorkList(TR, &C);
	}
	}

	void removeDeadBindingsWorker::VisitCluster(const MemRegion *baseR,
	const ClusterBindings *C) {
	if (!C)
	return;

	// Mark the symbol for any SymbolicRegion with live bindings as live itself.
	// This means we should continue to track that symbol.
	if (const SymbolicRegion *SymR = dyn_cast<SymbolicRegion>(baseR))
	SymReaper.markLive(SymR->getSymbol());

	for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E; ++I) {
	// Element index of a binding key is live.
	SymReaper.markElementIndicesLive(I.getKey().getRegion());

	VisitBinding(I.getData());
	}
	}

	void removeDeadBindingsWorker::VisitBinding(SVal V) {
	// Is it a LazyCompoundVal? All referenced regions are live as well.
	if (Optional<nonloc::LazyCompoundVal> LCS =
	V.getAs<nonloc::LazyCompoundVal>()) {

	const RegionStoreManager::SValListTy &Vals = RM.getInterestingValues(*LCS);

	for (RegionStoreManager::SValListTy::const_iterator I = Vals.begin(),
	E = Vals.end();
	I != E; ++I)
	VisitBinding(*I);

	return;
	}

	// If V is a region, then add it to the worklist.
	if (const MemRegion *R = V.getAsRegion()) {
	AddToWorkList(R);
	SymReaper.markLive(R);

	// All regions captured by a block are also live.
	if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(R)) {
	BlockDataRegion::referenced_vars_iterator I = BR->referenced_vars_begin(),
	E = BR->referenced_vars_end();
	for ( ; I != E; ++I)
	AddToWorkList(I.getCapturedRegion());
	}
	}


	// Update the set of live symbols.
	for (SymExpr::symbol_iterator SI = V.symbol_begin(), SE = V.symbol_end();
	SI!=SE; ++SI)
	SymReaper.markLive(*SI);
	}

	bool removeDeadBindingsWorker::UpdatePostponed() {
	// See if any postponed SymbolicRegions are actually live now, after
	// having done a scan.
	bool changed = false;

	for (SmallVectorImpl<const SymbolicRegion*>::iterator
	I = Postponed.begin(), E = Postponed.end() ; I != E ; ++I) {
	if (const SymbolicRegion SR = I) {
	if (SymReaper.isLive(SR->getSymbol())) {
	changed \|= AddToWorkList(SR);
	*I = nullptr;
	}
	}
	}

	return changed;
	}

	StoreRef RegionStoreManager::removeDeadBindings(Store store,
	const StackFrameContext *LCtx,
	SymbolReaper& SymReaper) {
	RegionBindingsRef B = getRegionBindings(store);
	removeDeadBindingsWorker W(*this, StateMgr, B, SymReaper, LCtx);
	W.GenerateClusters();

	// Enqueue the region roots onto the worklist.
	for (SymbolReaper::region_iterator I = SymReaper.region_begin(),
	E = SymReaper.region_end(); I != E; ++I) {
	W.AddToWorkList(*I);
	}

	do W.RunWorkList(); while (W.UpdatePostponed());

	// We have now scanned the store, marking reachable regions and symbols
	// as live. We now remove all the regions that are dead from the store
	// as well as update DSymbols with the set symbols that are now dead.
	for (RegionBindingsRef::iterator I = B.begin(), E = B.end(); I != E; ++I) {
	const MemRegion *Base = I.getKey();

	// If the cluster has been visited, we know the region has been marked.
	if (W.isVisited(Base))
	continue;

	// Remove the dead entry.
	B = B.remove(Base);

	if (const SymbolicRegion *SymR = dyn_cast<SymbolicRegion>(Base))
	SymReaper.maybeDead(SymR->getSymbol());

	// Mark all non-live symbols that this binding references as dead.
	const ClusterBindings &Cluster = I.getData();
	for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end();
	CI != CE; ++CI) {
	SVal X = CI.getData();
	SymExpr::symbol_iterator SI = X.symbol_begin(), SE = X.symbol_end();
	for (; SI != SE; ++SI)
	SymReaper.maybeDead(*SI);
	}
	}

	return StoreRef(B.asStore(), *this);
	}

	//===----------------------------------------------------------------------===//
	// Utility methods.
	//===----------------------------------------------------------------------===//

	void RegionStoreManager::print(Store store, raw_ostream &OS,
	const char* nl, const char *sep) {
	RegionBindingsRef B = getRegionBindings(store);
	OS << "Store (direct and default bindings), "
	<< B.asStore()
	<< " :" << nl;
	B.dump(OS, nl);
	}
	Index: head/contrib/llvm/tools/clang
	===================================================================
	--- head/contrib/llvm/tools/clang (revision 322854)
	+++ head/contrib/llvm/tools/clang (revision 322855)

	Property changes on: head/contrib/llvm/tools/clang
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/clang/dist:r322737-322850
	Index: head/contrib/llvm/tools/lld
	===================================================================
	--- head/contrib/llvm/tools/lld (revision 322854)
	+++ head/contrib/llvm/tools/lld (revision 322855)

	Property changes on: head/contrib/llvm/tools/lld
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/lld/dist:r322737-322850
	Index: head/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp
	===================================================================
	--- head/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp (revision 322854)
	+++ head/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp (revision 322855)
	@@ -1,137 +1,136 @@
	//===-- RegisterContextLinux_i386.cpp --------------------------- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===---------------------------------------------------------------------===//

	#include "RegisterContextLinux_i386.h"
	#include "RegisterContextPOSIX_x86.h"

	using namespace lldb_private;
	using namespace lldb;

	struct GPR {
	uint32_t ebx;
	uint32_t ecx;
	uint32_t edx;
	uint32_t esi;
	uint32_t edi;
	uint32_t ebp;
	uint32_t eax;
	uint32_t ds;
	uint32_t es;
	uint32_t fs;
	uint32_t gs;
	uint32_t orig_eax;
	uint32_t eip;
	uint32_t cs;
	uint32_t eflags;
	uint32_t esp;
	uint32_t ss;
	};

	struct FPR_i386 {
	uint16_t fctrl; // FPU Control Word (fcw)
	uint16_t fstat; // FPU Status Word (fsw)
	- uint8_t ftag; // FPU Tag Word (ftw)
	- uint8_t reserved_1; // Reserved
	+ uint16_t ftag; // FPU Tag Word (ftw)
	uint16_t fop; // Last Instruction Opcode (fop)
	union {
	struct {
	uint64_t fip; // Instruction Pointer
	uint64_t fdp; // Data Pointer
	} x86_64;
	struct {
	uint32_t fioff; // FPU IP Offset (fip)
	uint32_t fiseg; // FPU IP Selector (fcs)
	uint32_t fooff; // FPU Operand Pointer Offset (foo)
	uint32_t foseg; // FPU Operand Pointer Selector (fos)
	} i386_; // Added _ in the end to avoid error with gcc defining i386 in some
	// cases
	} ptr;
	uint32_t mxcsr; // MXCSR Register State
	uint32_t mxcsrmask; // MXCSR Mask
	MMSReg stmm[8]; // 8*16 bytes for each FP-reg = 128 bytes
	XMMReg xmm[8]; // 8*16 bytes for each XMM-reg = 128 bytes
	uint32_t padding[56];
	};

	struct UserArea {
	GPR regs; // General purpose registers.
	int32_t fpvalid; // True if FPU is being used.
	FPR_i386 i387; // FPU registers.
	uint32_t tsize; // Text segment size.
	uint32_t dsize; // Data segment size.
	uint32_t ssize; // Stack segment size.
	uint32_t start_code; // VM address of text.
	uint32_t start_stack; // VM address of stack bottom (top in rsp).
	int32_t signal; // Signal causing core dump.
	int32_t reserved; // Unused.
	uint32_t ar0; // Location of GPR's.
	uint32_t fpstate; // Location of FPR's. Should be a FXSTATE *, but this
	// has to be 32-bits even on 64-bit systems.
	uint32_t magic; // Identifier for core dumps.
	char u_comm[32]; // Command causing core dump.
	uint32_t u_debugreg[8]; // Debug registers (DR0 - DR7).
	};

	#define DR_SIZE sizeof(((UserArea *)NULL)->u_debugreg[0])
	#define DR_0_OFFSET 0xFC
	#define DR_OFFSET(reg_index) (DR_0_OFFSET + (reg_index * 4))
	#define FPR_SIZE(reg) sizeof(((FPR_i386 *)NULL)->reg)

	//---------------------------------------------------------------------------
	// Include RegisterInfos_i386 to declare our g_register_infos_i386 structure.
	//---------------------------------------------------------------------------
	#define DECLARE_REGISTER_INFOS_I386_STRUCT
	#include "RegisterInfos_i386.h"
	#undef DECLARE_REGISTER_INFOS_I386_STRUCT

	RegisterContextLinux_i386::RegisterContextLinux_i386(
	const ArchSpec &target_arch)
	: RegisterInfoInterface(target_arch) {
	RegisterInfo orig_ax = {"orig_eax",
	NULL,
	sizeof(((GPR *)NULL)->orig_eax),
	(LLVM_EXTENSION offsetof(GPR, orig_eax)),
	eEncodingUint,
	eFormatHex,
	{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
	LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
	LLDB_INVALID_REGNUM},
	nullptr,
	nullptr,
	nullptr,
	0};
	d_register_infos.push_back(orig_ax);
	}

	size_t RegisterContextLinux_i386::GetGPRSize() const { return sizeof(GPR); }

	const RegisterInfo *RegisterContextLinux_i386::GetRegisterInfo() const {
	switch (m_target_arch.GetMachine()) {
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	return g_register_infos_i386;
	default:
	assert(false && "Unhandled target architecture.");
	return NULL;
	}
	}

	uint32_t RegisterContextLinux_i386::GetRegisterCount() const {
	return static_cast<uint32_t>(sizeof(g_register_infos_i386) /
	sizeof(g_register_infos_i386[0]));
	}

	uint32_t RegisterContextLinux_i386::GetUserRegisterCount() const {
	return static_cast<uint32_t>(k_num_user_registers_i386);
	}

	const std::vector<lldb_private::RegisterInfo> *
	RegisterContextLinux_i386::GetDynamicRegisterInfoP() const {
	return &d_register_infos;
	}
	Index: head/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContext_x86.h
	===================================================================
	--- head/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContext_x86.h (revision 322854)
	+++ head/contrib/llvm/tools/lldb/source/Plugins/Process/Utility/RegisterContext_x86.h (revision 322855)
	@@ -1,359 +1,358 @@
	//===-- RegisterContext_x86.h ------------------------------------ C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//

	#ifndef liblldb_RegisterContext_x86_H_
	#define liblldb_RegisterContext_x86_H_

	#include <cstddef>
	#include <cstdint>

	#include "llvm/Support/Compiler.h"

	//---------------------------------------------------------------------------
	// i386 ehframe, dwarf regnums
	//---------------------------------------------------------------------------

	// Register numbers seen in eh_frame (eRegisterKindEHFrame) on i386 systems
	// (non-Darwin)
	//
	enum {
	ehframe_eax_i386 = 0,
	ehframe_ecx_i386,
	ehframe_edx_i386,
	ehframe_ebx_i386,

	// on Darwin esp & ebp are reversed in the eh_frame section for i386 (versus
	// dwarf's reg numbering).
	// To be specific:
	// i386+darwin eh_frame: 4 is ebp, 5 is esp
	// i386+everyone else eh_frame: 4 is esp, 5 is ebp
	// i386 dwarf: 4 is esp, 5 is ebp
	// lldb will get the darwin-specific eh_frame reg numberings from debugserver,
	// or the ABI, so we
	// only encode the generally correct 4 == esp, 5 == ebp numbers in this
	// generic header.

	ehframe_esp_i386,
	ehframe_ebp_i386,
	ehframe_esi_i386,
	ehframe_edi_i386,
	ehframe_eip_i386,
	ehframe_eflags_i386,
	ehframe_st0_i386 = 12,
	ehframe_st1_i386,
	ehframe_st2_i386,
	ehframe_st3_i386,
	ehframe_st4_i386,
	ehframe_st5_i386,
	ehframe_st6_i386,
	ehframe_st7_i386,
	ehframe_xmm0_i386 = 21,
	ehframe_xmm1_i386,
	ehframe_xmm2_i386,
	ehframe_xmm3_i386,
	ehframe_xmm4_i386,
	ehframe_xmm5_i386,
	ehframe_xmm6_i386,
	ehframe_xmm7_i386,
	ehframe_mm0_i386 = 29,
	ehframe_mm1_i386,
	ehframe_mm2_i386,
	ehframe_mm3_i386,
	ehframe_mm4_i386,
	ehframe_mm5_i386,
	ehframe_mm6_i386,
	ehframe_mm7_i386,
	};

	// DWARF register numbers (eRegisterKindDWARF)
	// Intel's x86 or IA-32
	enum {
	// General Purpose Registers.
	dwarf_eax_i386 = 0,
	dwarf_ecx_i386,
	dwarf_edx_i386,
	dwarf_ebx_i386,
	dwarf_esp_i386,
	dwarf_ebp_i386,
	dwarf_esi_i386,
	dwarf_edi_i386,
	dwarf_eip_i386,
	dwarf_eflags_i386,
	// Floating Point Registers
	dwarf_st0_i386 = 11,
	dwarf_st1_i386,
	dwarf_st2_i386,
	dwarf_st3_i386,
	dwarf_st4_i386,
	dwarf_st5_i386,
	dwarf_st6_i386,
	dwarf_st7_i386,
	// SSE Registers
	dwarf_xmm0_i386 = 21,
	dwarf_xmm1_i386,
	dwarf_xmm2_i386,
	dwarf_xmm3_i386,
	dwarf_xmm4_i386,
	dwarf_xmm5_i386,
	dwarf_xmm6_i386,
	dwarf_xmm7_i386,
	// MMX Registers
	dwarf_mm0_i386 = 29,
	dwarf_mm1_i386,
	dwarf_mm2_i386,
	dwarf_mm3_i386,
	dwarf_mm4_i386,
	dwarf_mm5_i386,
	dwarf_mm6_i386,
	dwarf_mm7_i386,
	dwarf_fctrl_i386 = 37, // x87 control word
	dwarf_fstat_i386 = 38, // x87 status word
	dwarf_mxcsr_i386 = 39,
	dwarf_es_i386 = 40,
	dwarf_cs_i386 = 41,
	dwarf_ss_i386 = 42,
	dwarf_ds_i386 = 43,
	dwarf_fs_i386 = 44,
	dwarf_gs_i386 = 45,

	// I believe the ymm registers use the dwarf_xmm%_i386 register numbers and
	// then differentiate based on size of the register.
	dwarf_bnd0_i386 = 101,
	dwarf_bnd1_i386,
	dwarf_bnd2_i386,
	dwarf_bnd3_i386,
	};

	//---------------------------------------------------------------------------
	// AMD x86_64, AMD64, Intel EM64T, or Intel 64 ehframe, dwarf regnums
	//---------------------------------------------------------------------------

	// EHFrame and DWARF Register numbers (eRegisterKindEHFrame &
	// eRegisterKindDWARF)
	// This is the spec I used (as opposed to x86-64-abi-0.99.pdf):
	// http://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf
	enum {
	// GP Registers
	dwarf_rax_x86_64 = 0,
	dwarf_rdx_x86_64,
	dwarf_rcx_x86_64,
	dwarf_rbx_x86_64,
	dwarf_rsi_x86_64,
	dwarf_rdi_x86_64,
	dwarf_rbp_x86_64,
	dwarf_rsp_x86_64,
	// Extended GP Registers
	dwarf_r8_x86_64 = 8,
	dwarf_r9_x86_64,
	dwarf_r10_x86_64,
	dwarf_r11_x86_64,
	dwarf_r12_x86_64,
	dwarf_r13_x86_64,
	dwarf_r14_x86_64,
	dwarf_r15_x86_64,
	// Return Address (RA) mapped to RIP
	dwarf_rip_x86_64 = 16,
	// SSE Vector Registers
	dwarf_xmm0_x86_64 = 17,
	dwarf_xmm1_x86_64,
	dwarf_xmm2_x86_64,
	dwarf_xmm3_x86_64,
	dwarf_xmm4_x86_64,
	dwarf_xmm5_x86_64,
	dwarf_xmm6_x86_64,
	dwarf_xmm7_x86_64,
	dwarf_xmm8_x86_64,
	dwarf_xmm9_x86_64,
	dwarf_xmm10_x86_64,
	dwarf_xmm11_x86_64,
	dwarf_xmm12_x86_64,
	dwarf_xmm13_x86_64,
	dwarf_xmm14_x86_64,
	dwarf_xmm15_x86_64,
	// Floating Point Registers
	dwarf_st0_x86_64 = 33,
	dwarf_st1_x86_64,
	dwarf_st2_x86_64,
	dwarf_st3_x86_64,
	dwarf_st4_x86_64,
	dwarf_st5_x86_64,
	dwarf_st6_x86_64,
	dwarf_st7_x86_64,
	// MMX Registers
	dwarf_mm0_x86_64 = 41,
	dwarf_mm1_x86_64,
	dwarf_mm2_x86_64,
	dwarf_mm3_x86_64,
	dwarf_mm4_x86_64,
	dwarf_mm5_x86_64,
	dwarf_mm6_x86_64,
	dwarf_mm7_x86_64,
	// Control and Status Flags Register
	dwarf_rflags_x86_64 = 49,
	// selector registers
	dwarf_es_x86_64 = 50,
	dwarf_cs_x86_64,
	dwarf_ss_x86_64,
	dwarf_ds_x86_64,
	dwarf_fs_x86_64,
	dwarf_gs_x86_64,
	// Floating point control registers
	dwarf_mxcsr_x86_64 = 64, // Media Control and Status
	dwarf_fctrl_x86_64, // x87 control word
	dwarf_fstat_x86_64, // x87 status word
	// Upper Vector Registers
	dwarf_ymm0h_x86_64 = 67,
	dwarf_ymm1h_x86_64,
	dwarf_ymm2h_x86_64,
	dwarf_ymm3h_x86_64,
	dwarf_ymm4h_x86_64,
	dwarf_ymm5h_x86_64,
	dwarf_ymm6h_x86_64,
	dwarf_ymm7h_x86_64,
	dwarf_ymm8h_x86_64,
	dwarf_ymm9h_x86_64,
	dwarf_ymm10h_x86_64,
	dwarf_ymm11h_x86_64,
	dwarf_ymm12h_x86_64,
	dwarf_ymm13h_x86_64,
	dwarf_ymm14h_x86_64,
	dwarf_ymm15h_x86_64,
	// MPX registers
	dwarf_bnd0_x86_64 = 126,
	dwarf_bnd1_x86_64,
	dwarf_bnd2_x86_64,
	dwarf_bnd3_x86_64,
	// AVX2 Vector Mask Registers
	// dwarf_k0_x86_64 = 118,
	// dwarf_k1_x86_64,
	// dwarf_k2_x86_64,
	// dwarf_k3_x86_64,
	// dwarf_k4_x86_64,
	// dwarf_k5_x86_64,
	// dwarf_k6_x86_64,
	// dwarf_k7_x86_64,
	};

	//---------------------------------------------------------------------------
	// Generic floating-point registers
	//---------------------------------------------------------------------------

	struct MMSReg {
	uint8_t bytes[10];
	uint8_t pad[6];
	};

	struct XMMReg {
	uint8_t bytes[16]; // 128-bits for each XMM register
	};

	// i387_fxsave_struct
	struct FXSAVE {
	uint16_t fctrl; // FPU Control Word (fcw)
	uint16_t fstat; // FPU Status Word (fsw)
	- uint8_t ftag; // FPU Tag Word (ftw)
	- uint8_t reserved_1; // Reserved
	+ uint16_t ftag; // FPU Tag Word (ftw)
	uint16_t fop; // Last Instruction Opcode (fop)
	union {
	struct {
	uint64_t fip; // Instruction Pointer
	uint64_t fdp; // Data Pointer
	} x86_64;
	struct {
	uint32_t fioff; // FPU IP Offset (fip)
	uint32_t fiseg; // FPU IP Selector (fcs)
	uint32_t fooff; // FPU Operand Pointer Offset (foo)
	uint32_t foseg; // FPU Operand Pointer Selector (fos)
	} i386_; // Added _ in the end to avoid error with gcc defining i386 in some
	// cases
	} ptr;
	uint32_t mxcsr; // MXCSR Register State
	uint32_t mxcsrmask; // MXCSR Mask
	MMSReg stmm[8]; // 8*16 bytes for each FP-reg = 128 bytes
	XMMReg xmm[16]; // 16*16 bytes for each XMM-reg = 256 bytes
	uint8_t padding1[48];
	uint64_t xcr0;
	uint8_t padding2[40];
	};

	//---------------------------------------------------------------------------
	// Extended floating-point registers
	//---------------------------------------------------------------------------

	struct YMMHReg {
	uint8_t bytes[16]; // 16 * 8 bits for the high bytes of each YMM register
	};

	struct YMMReg {
	uint8_t bytes[32]; // 16 * 16 bits for each YMM register
	};

	struct YMM {
	YMMReg ymm[16]; // assembled from ymmh and xmm registers
	};

	struct MPXReg {
	uint8_t bytes[16]; // MPX 128 bit bound registers
	};

	struct MPXCsr {
	uint8_t bytes[8]; // MPX 64 bit bndcfgu and bndstatus registers (collectively
	// BNDCSR state)
	};

	struct MPX {
	MPXReg mpxr[4];
	MPXCsr mpxc[2];
	};

	LLVM_PACKED_START
	struct XSAVE_HDR {
	uint64_t xstate_bv; // OS enabled xstate mask to determine the extended states
	// supported by the processor
	uint64_t xcomp_bv; // Mask to indicate the format of the XSAVE area and of
	// the XRSTOR instruction
	uint64_t reserved1[1];
	uint64_t reserved2[5];
	};
	LLVM_PACKED_END

	// x86 extensions to FXSAVE (i.e. for AVX and MPX processors)
	LLVM_PACKED_START
	struct LLVM_ALIGNAS(64) XSAVE {
	FXSAVE i387; // floating point registers typical in i387_fxsave_struct
	XSAVE_HDR header; // The xsave_hdr_struct can be used to determine if the
	// following extensions are usable
	YMMHReg ymmh[16]; // High 16 bytes of each of 16 YMM registers (the low bytes
	// are in FXSAVE.xmm for compatibility with SSE)
	uint64_t reserved3[16];
	MPXReg mpxr[4]; // MPX BNDREG state, containing 128-bit bound registers
	MPXCsr mpxc[2]; // MPX BNDCSR state, containing 64-bit BNDCFGU and
	// BNDSTATUS registers
	};
	LLVM_PACKED_END

	// Floating-point registers
	struct FPR {
	// Thread state for the floating-point unit of the processor read by ptrace.
	union XSTATE {
	FXSAVE fxsave; // Generic floating-point registers.
	XSAVE xsave; // x86 extended processor state.
	} xstate;
	};

	//---------------------------------------------------------------------------
	// ptrace PTRACE_GETREGSET, PTRACE_SETREGSET structure
	//---------------------------------------------------------------------------

	struct IOVEC {
	void *iov_base; // pointer to XSAVE
	size_t iov_len; // sizeof(XSAVE)
	};

	#endif
	Index: head/contrib/llvm/tools/lldb
	===================================================================
	--- head/contrib/llvm/tools/lldb (revision 322854)
	+++ head/contrib/llvm/tools/lldb (revision 322855)

	Property changes on: head/contrib/llvm/tools/lldb
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/lldb/dist:r322737-322850
	Index: head/contrib/llvm
	===================================================================
	--- head/contrib/llvm (revision 322854)
	+++ head/contrib/llvm (revision 322855)

	Property changes on: head/contrib/llvm
	___________________________________________________________________
	Modified: svn:mergeinfo
	## -0,0 +0,1 ##
	Merged /vendor/llvm/dist:r322737-322850
	Index: head/lib/clang/include/clang/Basic/Version.inc
	===================================================================
	--- head/lib/clang/include/clang/Basic/Version.inc (revision 322854)
	+++ head/lib/clang/include/clang/Basic/Version.inc (revision 322855)
	@@ -1,11 +1,11 @@
	/* $FreeBSD$ */

	#define CLANG_VERSION 5.0.0
	#define CLANG_VERSION_STRING "5.0.0"
	#define CLANG_VERSION_MAJOR 5
	#define CLANG_VERSION_MINOR 0
	#define CLANG_VERSION_PATCHLEVEL 0

	#define CLANG_VENDOR "FreeBSD "

	-#define SVN_REVISION "311219"
	+#define SVN_REVISION "311606"
	Index: head/lib/clang/include/lld/Config/Version.inc
	===================================================================
	--- head/lib/clang/include/lld/Config/Version.inc (revision 322854)
	+++ head/lib/clang/include/lld/Config/Version.inc (revision 322855)
	@@ -1,8 +1,8 @@
	// $FreeBSD$

	#define LLD_VERSION 5.0.0
	#define LLD_VERSION_STRING "5.0.0"
	#define LLD_VERSION_MAJOR 5
	#define LLD_VERSION_MINOR 0
	-#define LLD_REVISION_STRING "311219"
	+#define LLD_REVISION_STRING "311606"
	#define LLD_REPOSITORY_STRING "FreeBSD"
	Index: head/lib/clang/include/llvm/Support/VCSRevision.h
	===================================================================
	--- head/lib/clang/include/llvm/Support/VCSRevision.h (revision 322854)
	+++ head/lib/clang/include/llvm/Support/VCSRevision.h (revision 322855)
	@@ -1,2 +1,2 @@
	/* $FreeBSD$ */
	-#define LLVM_REVISION "svn-r311219"
	+#define LLVM_REVISION "svn-r311606"

File Metadata

Mime Type: application/octet-stream
Expires: Thu, Oct 16, 12:46 AM (1 d, 23 h)
Storage Engine: chunks
Storage Format: Chunks
Storage Handle: FT7k31vSR05e
Default Alt Text: (5 MB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions